{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "### 安装环境\n",
    "### https://langchain-ai.github.io/langgraph/tutorials/rag/langgraph_adaptive_rag_local/\n",
    "# %capture --no-stderr\n",
    "# %pip install -U langchain-nomic langchain_community tiktoken langchainhub chromadb langchain langgraph tavily-python"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Ollama model name\n",
    "local_llm = \"mistral\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "os.environ[\"LANGCHAIN_TRACING_V2\"] = \"false\"\n",
    "os.environ[\"LANGCHAIN_ENDPOINT\"] = \"https://api.smith.langchain.com\"\n",
    "os.environ[\"LANGCHAIN_API_KEY\"] = \"\"\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "3\n",
      "Document 0[1]:\n",
      "['Config', '__abstractmethods__', '__annotations__', '__class__', '__class_vars__', '__config__', '__custom_root_type__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__exclude_fields__', '__fields__', '__fields_set__', '__format__', '__ge__', '__get_validators__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__include_fields__', '__init__', '__init_subclass__', '__iter__', '__json_encoder__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__post_root_validators__', '__pre_root_validators__', '__pretty__', '__private_attributes__', '__reduce__', '__reduce_ex__', '__repr__', '__repr_args__', '__repr_name__', '__repr_str__', '__rich_repr__', '__schema_cache__', '__setattr__', '__setstate__', '__signature__', '__sizeof__', '__slots__', '__str__', '__subclasshook__', '__try_update_forward_refs__', '__validators__', '_abc_impl', '_calculate_keys', '_copy_and_set_values', '_decompose_class', '_enforce_dict_if_root', '_get_value', '_init_private_attributes', '_iter', 'construct', 'copy', 'dict', 'from_orm', 'get_lc_namespace', 'is_lc_serializable', 'json', 'lc_attributes', 'lc_id', 'lc_secrets', 'metadata', 'page_content', 'parse_file', 'parse_obj', 'parse_raw', 'schema', 'schema_json', 'to_json', 'to_json_not_implemented', 'type', 'update_forward_refs', 'validate']\n",
      "page_content='\\n\\n\\n\\n\\n\\nLLM Powered Autonomous Agents | Lil\\'Log\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nLil\\'Log\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nPosts\\n\\n\\n\\n\\nArchive\\n\\n\\n\\n\\nSearch\\n\\n\\n\\n\\nTags\\n\\n\\n\\n\\nFAQ\\n\\n\\n\\n\\nemojisearch.app\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n      LLM Powered Autonomous Agents\\n    \\nDate: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng\\n\\n\\n \\n\\n\\nTable of Contents\\n\\n\\n\\nAgent System Overview\\n\\nComponent One: Planning\\n\\nTask Decomposition\\n\\nSelf-Reflection\\n\\n\\nComponent Two: Memory\\n\\nTypes of Memory\\n\\nMaximum Inner Product Search (MIPS)\\n\\n\\nComponent Three: Tool Use\\n\\nCase Studies\\n\\nScientific Discovery Agent\\n\\nGenerative Agents Simulation\\n\\nProof-of-Concept Examples\\n\\n\\nChallenges\\n\\nCitation\\n\\nReferences\\n\\n\\n\\n\\n\\nBuilding agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\\nAgent System Overview#\\nIn a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:\\n\\nPlanning\\n\\nSubgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\\nReflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refine them for future steps, thereby improving the quality of final results.\\n\\n\\nMemory\\n\\nShort-term memory: I would consider all the in-context learning (See Prompt Engineering) as utilizing short-term memory of the model to learn.\\nLong-term memory: This provides the agent with the capability to retain and recall (infinite) information over extended periods, often by leveraging an external vector store and fast retrieval.\\n\\n\\nTool use\\n\\nThe agent learns to call external APIs for extra information that is missing from the model weights (often hard to change after pre-training), including current information, code execution capability, access to proprietary information sources and more.\\n\\n\\n\\n\\nFig. 1. Overview of a LLM-powered autonomous agent system.\\nComponent One: Planning#\\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\\nTask Decomposition#\\nChain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.\\nTree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.\\nTask decomposition can be done (1) by LLM with simple prompting like \"Steps for XYZ.\\\\n1.\", \"What are the subgoals for achieving XYZ?\", (2) by using task-specific instructions; e.g. \"Write a story outline.\" for writing a novel, or (3) with human inputs.\\nAnother quite distinct approach, LLM+P (Liu et al. 2023), involves relying on an external classical planner to do long-horizon planning. This approach utilizes the Planning Domain Definition Language (PDDL) as an intermediate interface to describe the planning problem. In this process, LLM (1) translates the problem into “Problem PDDL”, then (2) requests a classical planner to generate a PDDL plan based on an existing “Domain PDDL”, and finally (3) translates the PDDL plan back into natural language. Essentially, the planning step is outsourced to an external tool, assuming the availability of domain-specific PDDL and a suitable planner which is common in certain robotic setups but not in many other domains.\\nSelf-Reflection#\\nSelf-reflection is a vital aspect that allows autonomous agents to improve iteratively by refining past action decisions and correcting previous mistakes. It plays a crucial role in real-world tasks where trial and error are inevitable.\\nReAct (Yao et al. 2023) integrates reasoning and acting within LLM by extending the action space to be a combination of task-specific discrete actions and the language space. The former enables LLM to interact with the environment (e.g. use Wikipedia search API), while the latter prompting LLM to generate reasoning traces in natural language.\\nThe ReAct prompt template incorporates explicit steps for LLM to think, roughly formatted as:\\nThought: ...\\nAction: ...\\nObservation: ...\\n... (Repeated many times)\\n\\nFig. 2.  Examples of reasoning trajectories for knowledge-intensive tasks (e.g. HotpotQA, FEVER) and decision-making tasks (e.g. AlfWorld Env, WebShop). (Image source: Yao et al. 2023).\\nIn both experiments on knowledge-intensive tasks and decision-making tasks, ReAct works better than the Act-only baseline where Thought: … step is removed.\\nReflexion (Shinn & Labash 2023) is a framework to equips agents with dynamic memory and self-reflection capabilities to improve reasoning skills. Reflexion has a standard RL setup, in which the reward model provides a simple binary reward and the action space follows the setup in ReAct where the task-specific action space is augmented with language to enable complex reasoning steps. After each action $a_t$, the agent computes a heuristic $h_t$ and optionally may decide to reset the environment to start a new trial depending on the self-reflection results.\\n\\nFig. 3. Illustration of the Reflexion framework. (Image source: Shinn & Labash, 2023)\\nThe heuristic function determines when the trajectory is inefficient or contains hallucination and should be stopped. Inefficient planning refers to trajectories that take too long without success. Hallucination is defined as encountering a sequence of consecutive identical actions that lead to the same observation in the environment.\\nSelf-reflection is created by showing two-shot examples to LLM and each example is a pair of (failed trajectory, ideal reflection for guiding future changes in the plan). Then reflections are added into the agent’s working memory, up to three, to be used as context for querying LLM.\\n\\nFig. 4. Experiments on AlfWorld Env and HotpotQA. Hallucination is a more common failure than inefficient planning in AlfWorld. (Image source: Shinn & Labash, 2023)\\nChain of Hindsight (CoH; Liu et al. 2023) encourages the model to improve on its own outputs by explicitly presenting it with a sequence of past outputs, each annotated with feedback. Human feedback data is a collection of $D_h = \\\\{(x, y_i , r_i , z_i)\\\\}_{i=1}^n$, where $x$ is the prompt, each $y_i$ is a model completion, $r_i$ is the human rating of $y_i$, and $z_i$ is the corresponding human-provided hindsight feedback. Assume the feedback tuples are ranked by reward, $r_n \\\\geq r_{n-1} \\\\geq \\\\dots \\\\geq r_1$ The process is supervised fine-tuning where the data is a sequence in the form of $\\\\tau_h = (x, z_i, y_i, z_j, y_j, \\\\dots, z_n, y_n)$, where $\\\\leq i \\\\leq j \\\\leq n$. The model is finetuned to only predict $y_n$ where conditioned on the sequence prefix, such that the model can self-reflect to produce better output based on the feedback sequence. The model can optionally receive multiple rounds of instructions with human annotators at test time.\\nTo avoid overfitting, CoH adds a regularization term to maximize the log-likelihood of the pre-training dataset. To avoid shortcutting and copying (because there are many common words in feedback sequences), they randomly mask 0% - 5% of past tokens during training.\\nThe training dataset in their experiments is a combination of WebGPT comparisons, summarization from human feedback and human preference dataset.\\n\\nFig. 5. After fine-tuning with CoH, the model can follow instructions to produce outputs with incremental improvement in a sequence. (Image source: Liu et al. 2023)\\nThe idea of CoH is to present a history of sequentially improved outputs  in context and train the model to take on the trend to produce better outputs. Algorithm Distillation (AD; Laskin et al. 2023) applies the same idea to cross-episode trajectories in reinforcement learning tasks, where an algorithm is encapsulated in a long history-conditioned policy. Considering that an agent interacts with the environment many times and in each episode the agent gets a little better, AD concatenates this learning history and feeds that into the model. Hence we should expect the next predicted action to lead to better performance than previous trials. The goal is to learn the process of RL instead of training a task-specific policy itself.\\n\\nFig. 6. Illustration of how Algorithm Distillation (AD) works. (Image source: Laskin et al. 2023).\\nThe paper hypothesizes that any algorithm that generates a set of learning histories can be distilled into a neural network by performing behavioral cloning over actions. The history data is generated by a set of source policies, each trained for a specific task. At the training stage, during each RL run, a random task is sampled and a subsequence of multi-episode history is used for training, such that the learned policy is task-agnostic.\\nIn reality, the model has limited context window length, so episodes should be short enough to construct multi-episode history. Multi-episodic contexts of 2-4 episodes are necessary to learn a near-optimal in-context RL algorithm. The emergence of in-context RL requires long enough context.\\nIn comparison with three baselines, including ED (expert distillation, behavior cloning with expert trajectories instead of learning history), source policy (used for generating trajectories for distillation by UCB), RL^2 (Duan et al. 2017; used as upper bound since it needs online RL), AD demonstrates in-context RL with performance getting close to RL^2 despite only using offline RL and learns much faster than other baselines. When conditioned on partial training history of the source policy, AD also improves much faster than ED baseline.\\n\\nFig. 7. Comparison of AD, ED, source policy and RL^2 on environments that require memory and exploration. Only binary reward is assigned. The source policies are trained with A3C for \"dark\" environments and DQN for watermaze.(Image source: Laskin et al. 2023)\\nComponent Two: Memory#\\n(Big thank you to ChatGPT for helping me draft this section. I’ve learned a lot about the human brain and data structure for fast MIPS in my conversations with ChatGPT.)\\nTypes of Memory#\\nMemory can be defined as the processes used to acquire, store, retain, and later retrieve information. There are several types of memory in human brains.\\n\\n\\nSensory Memory: This is the earliest stage of memory, providing the ability to retain impressions of sensory information (visual, auditory, etc) after the original stimuli have ended. Sensory memory typically only lasts for up to a few seconds. Subcategories include iconic memory (visual), echoic memory (auditory), and haptic memory (touch).\\n\\n\\nShort-Term Memory (STM) or Working Memory: It stores information that we are currently aware of and needed to carry out complex cognitive tasks such as learning and reasoning. Short-term memory is believed to have the capacity of about 7 items (Miller 1956) and lasts for 20-30 seconds.\\n\\n\\nLong-Term Memory (LTM): Long-term memory can store information for a remarkably long time, ranging from a few days to decades, with an essentially unlimited storage capacity. There are two subtypes of LTM:\\n\\nExplicit / declarative memory: This is memory of facts and events, and refers to those memories that can be consciously recalled, including episodic memory (events and experiences) and semantic memory (facts and concepts).\\nImplicit / procedural memory: This type of memory is unconscious and involves skills and routines that are performed automatically, like riding a bike or typing on a keyboard.\\n\\n\\n\\n\\nFig. 8. Categorization of human memory.\\nWe can roughly consider the following mappings:\\n\\nSensory memory as learning embedding representations for raw inputs, including text, image or other modalities;\\nShort-term memory as in-context learning. It is short and finite, as it is restricted by the finite context window length of Transformer.\\nLong-term memory as the external vector store that the agent can attend to at query time, accessible via fast retrieval.\\n\\nMaximum Inner Product Search (MIPS)#\\nThe external memory can alleviate the restriction of finite attention span.  A standard practice is to save the embedding representation of information into a vector store database that can support fast maximum inner-product search (MIPS). To optimize the retrieval speed, the common choice is the approximate nearest neighbors (ANN)\\u200b algorithm to return approximately top k nearest neighbors to trade off a little accuracy lost for a huge speedup.\\nA couple common choices of ANN algorithms for fast MIPS:\\n\\nLSH (Locality-Sensitive Hashing): It introduces a hashing function such that similar input items are mapped to the same buckets with high probability, where the number of buckets is much smaller than the number of inputs.\\nANNOY (Approximate Nearest Neighbors Oh Yeah): The core data structure are random projection trees, a set of binary trees where each non-leaf node represents a hyperplane splitting the input space into half and each leaf stores one data point. Trees are built independently and at random, so to some extent, it mimics a hashing function. ANNOY search happens in all the trees to iteratively search through the half that is closest to the query and then aggregates the results. The idea is quite related to KD tree but a lot more scalable.\\nHNSW (Hierarchical Navigable Small World): It is inspired by the idea of small world networks where most nodes can be reached by any other nodes within a small number of steps; e.g. “six degrees of separation” feature of social networks. HNSW builds hierarchical layers of these small-world graphs, where the bottom layers contain the actual data points. The layers in the middle create shortcuts to speed up search. When performing a search, HNSW starts from a random node in the top layer and navigates towards the target. When it can’t get any closer, it moves down to the next layer, until it reaches the bottom layer. Each move in the upper layers can potentially cover a large distance in the data space, and each move in the lower layers refines the search quality.\\nFAISS (Facebook AI Similarity Search): It operates on the assumption that in high dimensional space, distances between nodes follow a Gaussian distribution and thus there should exist clustering of data points. FAISS applies vector quantization by partitioning the vector space into clusters and then refining the quantization within clusters. Search first looks for cluster candidates with coarse quantization and then further looks into each cluster with finer quantization.\\nScaNN (Scalable Nearest Neighbors): The main innovation in ScaNN is anisotropic vector quantization. It quantizes a data point $x_i$ to $\\\\tilde{x}_i$ such that the inner product $\\\\langle q, x_i \\\\rangle$ is as similar to the original distance of $\\\\angle q, \\\\tilde{x}_i$ as possible, instead of picking the closet quantization centroid points.\\n\\n\\nFig. 9. Comparison of MIPS algorithms, measured in recall@10. (Image source: Google Blog, 2020)\\nCheck more MIPS algorithms and performance comparison in ann-benchmarks.com.\\nComponent Three: Tool Use#\\nTool use is a remarkable and distinguishing characteristic of human beings. We create, modify and utilize external objects to do things that go beyond our physical and cognitive limits. Equipping LLMs with external tools can significantly extend the model capabilities.\\n\\nFig. 10. A picture of a sea otter using rock to crack open a seashell, while floating in the water. While some other animals can use tools, the complexity is not comparable with humans. (Image source: Animals using tools)\\nMRKL (Karpas et al. 2022), short for “Modular Reasoning, Knowledge and Language”, is a neuro-symbolic architecture for autonomous agents. A MRKL system is proposed to contain a collection of “expert” modules and the general-purpose LLM works as a router to route inquiries to the best suitable expert module. These modules can be neural (e.g. deep learning models) or symbolic (e.g. math calculator, currency converter, weather API).\\nThey did an experiment on fine-tuning LLM to call a calculator, using arithmetic as a test case. Their experiments showed that it was harder to solve verbal math problems than explicitly stated math problems because LLMs (7B Jurassic1-large model) failed to extract the right arguments for the basic arithmetic reliably. The results highlight when the external symbolic tools can work reliably, knowing when to and how to use the tools are crucial, determined by the LLM capability.\\nBoth TALM (Tool Augmented Language Models; Parisi et al. 2022) and Toolformer (Schick et al. 2023) fine-tune a LM to learn to use external tool APIs. The dataset is expanded based on whether a newly added API call annotation can improve the quality of model outputs. See more details in the “External APIs” section of Prompt Engineering.\\nChatGPT Plugins and OpenAI API  function calling are good examples of LLMs augmented with tool use capability working in practice. The collection of tool APIs can be provided by other developers (as in Plugins) or self-defined (as in function calls).\\nHuggingGPT (Shen et al. 2023) is a framework to use ChatGPT as the task planner to select models available in HuggingFace platform according to the model descriptions and summarize the response based on the execution results.\\n\\nFig. 11. Illustration of how HuggingGPT works. (Image source: Shen et al. 2023)\\nThe system comprises of 4 stages:\\n(1) Task planning: LLM works as the brain and parses the user requests into multiple tasks. There are four attributes associated with each task: task type, ID, dependencies, and arguments. They use few-shot examples to guide LLM to do task parsing and planning.\\nInstruction:\\n\\nThe AI assistant can parse user input to several tasks: [{\"task\": task, \"id\", task_id, \"dep\": dependency_task_ids, \"args\": {\"text\": text, \"image\": URL, \"audio\": URL, \"video\": URL}}]. The \"dep\" field denotes the id of the previous task which generates a new resource that the current task relies on. A special tag \"-task_id\" refers to the generated text image, audio and video in the dependency task with id as task_id. The task MUST be selected from the following options: {{ Available Task List }}. There is a logical relationship between tasks, please note their order. If the user input can\\'t be parsed, you need to reply empty JSON. Here are several cases for your reference: {{ Demonstrations }}. The chat history is recorded as {{ Chat History }}. From this chat history, you can find the path of the user-mentioned resources for your task planning.\\n\\n(2) Model selection: LLM distributes the tasks to expert models, where the request is framed as a multiple-choice question. LLM is presented with a list of models to choose from. Due to the limited context length, task type based filtration is needed.\\nInstruction:\\n\\nGiven the user request and the call command, the AI assistant helps the user to select a suitable model from a list of models to process the user request. The AI assistant merely outputs the model id of the most appropriate model. The output must be in a strict JSON format: \"id\": \"id\", \"reason\": \"your detail reason for the choice\". We have a list of models for you to choose from {{ Candidate Models }}. Please select one model from the list.\\n\\n(3) Task execution: Expert models execute on the specific tasks and log results.\\nInstruction:\\n\\nWith the input and the inference results, the AI assistant needs to describe the process and results. The previous stages can be formed as - User Input: {{ User Input }}, Task Planning: {{ Tasks }}, Model Selection: {{ Model Assignment }}, Task Execution: {{ Predictions }}. You must first answer the user\\'s request in a straightforward manner. Then describe the task process and show your analysis and model inference results to the user in the first person. If inference results contain a file path, must tell the user the complete file path.\\n\\n(4) Response generation: LLM receives the execution results and provides summarized results to users.\\nTo put HuggingGPT into real world usage, a couple challenges need to solve: (1) Efficiency improvement is needed as both LLM inference rounds and interactions with other models slow down the process; (2) It relies on a long context window to communicate over complicated task content; (3) Stability improvement of LLM outputs and external model services.\\nAPI-Bank (Li et al. 2023) is a benchmark for evaluating the performance of tool-augmented LLMs. It contains 53 commonly used API tools, a complete tool-augmented LLM workflow, and 264 annotated dialogues that involve 568 API calls. The selection of APIs is quite diverse, including search engines, calculator, calendar queries, smart home control, schedule management, health data management, account authentication workflow and more. Because there are a large number of APIs, LLM first has access to API search engine to find the right API to call and then uses the corresponding documentation to make a call.\\n\\nFig. 12. Pseudo code of how LLM makes an API call in API-Bank. (Image source: Li et al. 2023)\\nIn the API-Bank workflow, LLMs need to make a couple of decisions and at each step we can evaluate how accurate that decision is. Decisions include:\\n\\nWhether an API call is needed.\\nIdentify the right API to call: if not good enough, LLMs need to iteratively modify the API inputs (e.g. deciding search keywords for Search Engine API).\\nResponse based on the API results: the model can choose to refine and call again if results are not satisfied.\\n\\nThis benchmark evaluates the agent’s tool use capabilities at three levels:\\n\\nLevel-1 evaluates the ability to call the API. Given an API’s description, the model needs to determine whether to call a given API, call it correctly, and respond properly to API returns.\\nLevel-2 examines the ability to retrieve the API. The model needs to search for possible APIs that may solve the user’s requirement and learn how to use them by reading documentation.\\nLevel-3 assesses the ability to plan API beyond retrieve and call. Given unclear user requests (e.g. schedule group meetings, book flight/hotel/restaurant for a trip), the model may have to conduct multiple API calls to solve it.\\n\\nCase Studies#\\nScientific Discovery Agent#\\nChemCrow (Bran et al. 2023) is a domain-specific example in which LLM is augmented with 13 expert-designed tools to accomplish tasks across organic synthesis, drug discovery, and materials design. The workflow, implemented in LangChain, reflects what was previously described in the ReAct and MRKLs and combines CoT reasoning with tools relevant to the tasks:\\n\\nThe LLM is provided with a list of tool names, descriptions of their utility, and details about the expected input/output.\\nIt is then instructed to answer a user-given prompt using the tools provided when necessary. The instruction suggests the model to follow the ReAct format - Thought, Action, Action Input, Observation.\\n\\nOne interesting observation is that while the LLM-based evaluation concluded that GPT-4 and ChemCrow perform nearly equivalently, human evaluations with experts oriented towards the completion and chemical correctness of the solutions showed that ChemCrow outperforms GPT-4 by a large margin. This indicates a potential problem with using LLM to evaluate its own performance on domains that requires deep expertise. The lack of expertise may cause LLMs not knowing its flaws and thus cannot well judge the correctness of task results.\\nBoiko et al. (2023) also looked into LLM-empowered agents for scientific discovery, to handle autonomous design, planning, and performance of complex scientific experiments. This agent can use tools to browse the Internet, read documentation, execute code, call robotics experimentation APIs and leverage other LLMs.\\nFor example, when requested to \"develop a novel anticancer drug\", the model came up with the following reasoning steps:\\n\\ninquired about current trends in anticancer drug discovery;\\nselected a target;\\nrequested a scaffold targeting these compounds;\\nOnce the compound was identified, the model attempted its synthesis.\\n\\nThey also discussed the risks, especially with illicit drugs and bioweapons. They developed a test set containing a list of known chemical weapon agents and asked the agent to synthesize them. 4 out of 11 requests (36%) were accepted to obtain a synthesis solution and the agent attempted to consult documentation to execute the procedure. 7 out of 11 were rejected and among these 7 rejected cases, 5 happened after a Web search while 2 were rejected based on prompt only.\\nGenerative Agents Simulation#\\nGenerative Agents (Park, et al. 2023) is super fun experiment where 25 virtual characters, each controlled by a LLM-powered agent, are living and interacting in a sandbox environment, inspired by The Sims. Generative agents create believable simulacra of human behavior for interactive applications.\\nThe design of generative agents combines LLM with memory, planning and reflection mechanisms to enable agents to behave conditioned on past experience, as well as to interact with other agents.\\n\\nMemory stream: is a long-term memory module (external database) that records a comprehensive list of agents’ experience in natural language.\\n\\nEach element is an observation, an event directly provided by the agent.\\n- Inter-agent communication can trigger new natural language statements.\\n\\n\\nRetrieval model: surfaces the context to inform the agent’s behavior, according to relevance, recency and importance.\\n\\nRecency: recent events have higher scores\\nImportance: distinguish mundane from core memories. Ask LM directly.\\nRelevance: based on how related it is to the current situation / query.\\n\\n\\nReflection mechanism: synthesizes memories into higher level inferences over time and guides the agent’s future behavior. They are higher-level summaries of past events (<- note that this is a bit different from self-reflection above)\\n\\nPrompt LM with 100 most recent observations and to generate 3 most salient high-level questions given a set of observations/statements. Then ask LM to answer those questions.\\n\\n\\nPlanning & Reacting: translate the reflections and the environment information into actions\\n\\nPlanning is essentially in order to optimize believability at the moment vs in time.\\nPrompt template: {Intro of an agent X}. Here is X\\'s plan today in broad strokes: 1)\\nRelationships between agents and observations of one agent by another are all taken into consideration for planning and reacting.\\nEnvironment information is present in a tree structure.\\n\\n\\n\\n\\nFig. 13. The generative agent architecture. (Image source: Park et al. 2023)\\nThis fun simulation results in emergent social behavior, such as information diffusion, relationship memory (e.g. two agents continuing the conversation topic) and coordination of social events (e.g. host a party and invite many others).\\nProof-of-Concept Examples#\\nAutoGPT has drawn a lot of attention into the possibility of setting up autonomous agents with LLM as the main controller. It has quite a lot of reliability issues given the natural language interface, but nevertheless a cool proof-of-concept demo. A lot of code in AutoGPT is about format parsing.\\nHere is the system message used by AutoGPT, where {{...}} are user inputs:\\nYou are {{ai-name}}, {{user-provided AI bot description}}.\\nYour decisions must always be made independently without seeking user assistance. Play to your strengths as an LLM and pursue simple strategies with no legal complications.\\n\\nGOALS:\\n\\n1. {{user-provided goal 1}}\\n2. {{user-provided goal 2}}\\n3. ...\\n4. ...\\n5. ...\\n\\nConstraints:\\n1. ~4000 word limit for short term memory. Your short term memory is short, so immediately save important information to files.\\n2. If you are unsure how you previously did something or want to recall past events, thinking about similar events will help you remember.\\n3. No user assistance\\n4. Exclusively use the commands listed in double quotes e.g. \"command name\"\\n5. Use subprocesses for commands that will not terminate within a few minutes\\n\\nCommands:\\n1. Google Search: \"google\", args: \"input\": \"<search>\"\\n2. Browse Website: \"browse_website\", args: \"url\": \"<url>\", \"question\": \"<what_you_want_to_find_on_website>\"\\n3. Start GPT Agent: \"start_agent\", args: \"name\": \"<name>\", \"task\": \"<short_task_desc>\", \"prompt\": \"<prompt>\"\\n4. Message GPT Agent: \"message_agent\", args: \"key\": \"<key>\", \"message\": \"<message>\"\\n5. List GPT Agents: \"list_agents\", args:\\n6. Delete GPT Agent: \"delete_agent\", args: \"key\": \"<key>\"\\n7. Clone Repository: \"clone_repository\", args: \"repository_url\": \"<url>\", \"clone_path\": \"<directory>\"\\n8. Write to file: \"write_to_file\", args: \"file\": \"<file>\", \"text\": \"<text>\"\\n9. Read file: \"read_file\", args: \"file\": \"<file>\"\\n10. Append to file: \"append_to_file\", args: \"file\": \"<file>\", \"text\": \"<text>\"\\n11. Delete file: \"delete_file\", args: \"file\": \"<file>\"\\n12. Search Files: \"search_files\", args: \"directory\": \"<directory>\"\\n13. Analyze Code: \"analyze_code\", args: \"code\": \"<full_code_string>\"\\n14. Get Improved Code: \"improve_code\", args: \"suggestions\": \"<list_of_suggestions>\", \"code\": \"<full_code_string>\"\\n15. Write Tests: \"write_tests\", args: \"code\": \"<full_code_string>\", \"focus\": \"<list_of_focus_areas>\"\\n16. Execute Python File: \"execute_python_file\", args: \"file\": \"<file>\"\\n17. Generate Image: \"generate_image\", args: \"prompt\": \"<prompt>\"\\n18. Send Tweet: \"send_tweet\", args: \"text\": \"<text>\"\\n19. Do Nothing: \"do_nothing\", args:\\n20. Task Complete (Shutdown): \"task_complete\", args: \"reason\": \"<reason>\"\\n\\nResources:\\n1. Internet access for searches and information gathering.\\n2. Long Term memory management.\\n3. GPT-3.5 powered Agents for delegation of simple tasks.\\n4. File output.\\n\\nPerformance Evaluation:\\n1. Continuously review and analyze your actions to ensure you are performing to the best of your abilities.\\n2. Constructively self-criticize your big-picture behavior constantly.\\n3. Reflect on past decisions and strategies to refine your approach.\\n4. Every command has a cost, so be smart and efficient. Aim to complete tasks in the least number of steps.\\n\\nYou should only respond in JSON format as described below\\nResponse Format:\\n{\\n    \"thoughts\": {\\n        \"text\": \"thought\",\\n        \"reasoning\": \"reasoning\",\\n        \"plan\": \"- short bulleted\\\\n- list that conveys\\\\n- long-term plan\",\\n        \"criticism\": \"constructive self-criticism\",\\n        \"speak\": \"thoughts summary to say to user\"\\n    },\\n    \"command\": {\\n        \"name\": \"command name\",\\n        \"args\": {\\n            \"arg name\": \"value\"\\n        }\\n    }\\n}\\nEnsure the response can be parsed by Python json.loads\\nGPT-Engineer is another project to create a whole repository of code given a task specified in natural language. The GPT-Engineer is instructed to think over a list of smaller components to build and ask for user input to clarify questions as needed.\\nHere are a sample conversation for task clarification sent to OpenAI ChatCompletion endpoint used by GPT-Engineer. The user inputs are wrapped in {{user input text}}.\\n[\\n  {\\n    \"role\": \"system\",\\n    \"content\": \"You will read instructions and not carry them out, only seek to clarify them.\\\\nSpecifically you will first summarise a list of super short bullets of areas that need clarification.\\\\nThen you will pick one clarifying question, and wait for an answer from the user.\\\\n\"\\n  },\\n  {\\n    \"role\": \"user\",\\n    \"content\": \"We are writing {{a Super Mario game in python. MVC components split in separate files. Keyboard control.}}\\\\n\"\\n  },\\n  {\\n    \"role\": \"assistant\",\\n    \"content\": \"Summary of areas that need clarification:\\\\n1. Specifics of the Super Mario game (e.g. level design, characters, gameplay mechanics)\\\\n2. Details about the MVC components (e.g. which components are in each file)\\\\n3. Keyboard control implementation (e.g. which keys to use, how to handle input)\\\\n\\\\nClarifying question:\\\\nCan you provide more details about the Super Mario game, such as level design, characters, and gameplay mechanics?\"\\n  },\\n  {\\n    \"role\": \"user\",\\n    \"content\": \"{{There are 10 levels in total. The main character is a plumber named Mario, who can walk and jump. It is a classical platform game just like Super Mario. The main character moves from left to right, trying to get to the destination, where there are many obstacles and attacks from enemies in the process.}}\\\\n\\\\nIs anything else unclear? If yes, only answer in the form:\\\\n{remaining unclear areas} remaining questions.\\\\n{Next question}\\\\nIf everything is sufficiently clear, only answer \\\\\"Nothing more to clarify.\\\\\".\"\\n  },\\n  {\\n    \"role\": \"assistant\",\\n    \"content\": \"Remaining unclear areas: 2 remaining questions.\\\\nCan you provide more information about how the MVC components are split into separate files?\"\\n  },\\n  {\\n    \"role\": \"user\",\\n    \"content\": \"{{Make your own assumptions and state them explicitly before starting}}\"\\n  }\\n]\\nThen after these clarification, the agent moved into the code writing mode with a different system message.\\nSystem message:\\n\\nYou will get instructions for code to write.\\nYou will write a very long answer. Make sure that every detail of the architecture is, in the end, implemented as code.\\nMake sure that every detail of the architecture is, in the end, implemented as code.\\nThink step by step and reason yourself to the right decisions to make sure we get it right.\\nYou will first lay out the names of the core classes, functions, methods that will be necessary, as well as a quick comment on their purpose.\\nThen you will output the content of each file including ALL code.\\nEach file must strictly follow a markdown code block format, where the following tokens must be replaced such that\\nFILENAME is the lowercase file name including the file extension,\\nLANG is the markup code block language for the code’s language, and CODE is the code:\\nFILENAME\\nCODE\\nYou will start with the “entrypoint” file, then go to the ones that are imported by that file, and so on.\\nPlease note that the code should be fully functional. No placeholders.\\nFollow a language and framework appropriate best practice file naming convention.\\nMake sure that files contain all imports, types etc. Make sure that code in different files are compatible with each other.\\nEnsure to implement all code, if you are unsure, write a plausible implementation.\\nInclude module dependency or package manager dependency definition file.\\nBefore you finish, double check that all parts of the architecture is present in the files.\\nUseful to know:\\nYou almost always put different classes in different files.\\nFor Python, you always create an appropriate requirements.txt file.\\nFor NodeJS, you always create an appropriate package.json file.\\nYou always add a comment briefly describing the purpose of the function definition.\\nYou try to add comments explaining very complex bits of logic.\\nYou always follow the best practices for the requested languages in terms of describing the code written as a defined\\npackage/project.\\nPython toolbelt preferences:\\n\\npytest\\ndataclasses\\n\\n\\nConversatin samples:\\n[\\n  {\\n    \"role\": \"system\",\\n    \"content\": \"You will get instructions for code to write.\\\\nYou will write a very long answer. Make sure that every detail of the architecture is, in the end, implemented as code.\\\\nMake sure that every detail of the architecture is, in the end, implemented as code.\\\\n\\\\nThink step by step and reason yourself to the right decisions to make sure we get it right.\\\\nYou will first lay out the names of the core classes, functions, methods that will be necessary, as well as a quick comment on their purpose.\\\\n\\\\nThen you will output the content of each file including ALL code.\\\\nEach file must strictly follow a markdown code block format, where the following tokens must be replaced such that\\\\nFILENAME is the lowercase file name including the file extension,\\\\nLANG is the markup code block language for the code\\'s language, and CODE is the code:\\\\n\\\\nFILENAME\\\\n```LANG\\\\nCODE\\\\n```\\\\n\\\\nYou will start with the \\\\\"entrypoint\\\\\" file, then go to the ones that are imported by that file, and so on.\\\\nPlease note that the code should be fully functional. No placeholders.\\\\n\\\\nFollow a language and framework appropriate best practice file naming convention.\\\\nMake sure that files contain all imports, types etc. Make sure that code in different files are compatible with each other.\\\\nEnsure to implement all code, if you are unsure, write a plausible implementation.\\\\nInclude module dependency or package manager dependency definition file.\\\\nBefore you finish, double check that all parts of the architecture is present in the files.\\\\n\\\\nUseful to know:\\\\nYou almost always put different classes in different files.\\\\nFor Python, you always create an appropriate requirements.txt file.\\\\nFor NodeJS, you always create an appropriate package.json file.\\\\nYou always add a comment briefly describing the purpose of the function definition.\\\\nYou try to add comments explaining very complex bits of logic.\\\\nYou always follow the best practices for the requested languages in terms of describing the code written as a defined\\\\npackage/project.\\\\n\\\\n\\\\nPython toolbelt preferences:\\\\n- pytest\\\\n- dataclasses\\\\n\"\\n  },\\n #  … same conversation as earlier, ended with \"Make your own assumptions and state them explicitly before starting\".\\n  {\\n    \"role\": \"assistant\",\\n    \"content\": \"Assumptions:\\\\n1. Model: The model will contain the game\\'s data, such as level information, character states, and enemy positions.\\\\n2. View: The view will handle the game\\'s visuals, including rendering the game objects, backgrounds, and updating the display.\\\\n3. Controller: The controller will manage user input, such as keyboard controls, and update the model accordingly.\\\\n\\\\nWith these assumptions in mind, is there any specific aspect of the keyboard control implementation you would like me to clarify?\"\\n  },\\n  {\\n    \"role\": \"user\",\\n    \"content\": \"Please now remember the steps:\\\\n\\\\nThink step by step and reason yourself to the right decisions to make sure we get it right.\\\\nFirst lay out the names of the core classes, functions, methods that will be necessary, As well as a quick comment on their purpose.\\\\n\\\\nThen you will output the content of each file including ALL code.\\\\nEach file must strictly follow a markdown code block format, where the following tokens must be replaced such that\\\\nFILENAME is the lowercase file name including the file extension,\\\\nLANG is the markup code block language for the code\\'s language, and CODE is the code:\\\\n\\\\nFILENAME\\\\n```LANG\\\\nCODE\\\\n```\\\\n\\\\nPlease note that the code should be fully functional. No placeholders.\\\\n\\\\nYou will start with the \\\\\"entrypoint\\\\\" file, then go to the ones that are imported by that file, and so on.\\\\nFollow a language and framework appropriate best practice file naming convention.\\\\nMake sure that files contain all imports, types etc. The code should be fully functional. Make sure that code in different files are compatible with each other.\\\\nBefore you finish, double check that all parts of the architecture is present in the files.\\\\n\"\\n  }\\n]\\nChallenges#\\nAfter going through key ideas and demos of building LLM-centered agents, I start to see a couple common limitations:\\n\\n\\nFinite context length: The restricted context capacity limits the inclusion of historical information, detailed instructions, API call context, and responses. The design of the system has to work with this limited communication bandwidth, while mechanisms like self-reflection to learn from past mistakes would benefit a lot from long or infinite context windows. Although vector stores and retrieval can provide access to a larger knowledge pool, their representation power is not as powerful as full attention.\\n\\n\\nChallenges in long-term planning and task decomposition: Planning over a lengthy history and effectively exploring the solution space remain challenging. LLMs struggle to adjust plans when faced with unexpected errors, making them less robust compared to humans who learn from trial and error.\\n\\n\\nReliability of natural language interface: Current agent system relies on natural language as an interface between LLMs and external components such as memory and tools. However, the reliability of model outputs is questionable, as LLMs may make formatting errors and occasionally exhibit rebellious behavior (e.g. refuse to follow an instruction). Consequently, much of the agent demo code focuses on parsing model output.\\n\\n\\nCitation#\\nCited as:\\n\\nWeng, Lilian. (Jun 2023). “LLM-powered Autonomous Agents”. Lil’Log. https://lilianweng.github.io/posts/2023-06-23-agent/.\\n\\nOr\\n@article{weng2023agent,\\n  title   = \"LLM-powered Autonomous Agents\",\\n  author  = \"Weng, Lilian\",\\n  journal = \"lilianweng.github.io\",\\n  year    = \"2023\",\\n  month   = \"Jun\",\\n  url     = \"https://lilianweng.github.io/posts/2023-06-23-agent/\"\\n}\\nReferences#\\n[1] Wei et al. “Chain of thought prompting elicits reasoning in large language models.” NeurIPS 2022\\n[2] Yao et al. “Tree of Thoughts: Dliberate Problem Solving with Large Language Models.” arXiv preprint arXiv:2305.10601 (2023).\\n[3] Liu et al. “Chain of Hindsight Aligns Language Models with Feedback\\n“ arXiv preprint arXiv:2302.02676 (2023).\\n[4] Liu et al. “LLM+P: Empowering Large Language Models with Optimal Planning Proficiency” arXiv preprint arXiv:2304.11477 (2023).\\n[5] Yao et al. “ReAct: Synergizing reasoning and acting in language models.” ICLR 2023.\\n[6] Google Blog. “Announcing ScaNN: Efficient Vector Similarity Search” July 28, 2020.\\n[7] https://chat.openai.com/share/46ff149e-a4c7-4dd7-a800-fc4a642ea389\\n[8] Shinn & Labash. “Reflexion: an autonomous agent with dynamic memory and self-reflection” arXiv preprint arXiv:2303.11366 (2023).\\n[9] Laskin et al. “In-context Reinforcement Learning with Algorithm Distillation” ICLR 2023.\\n[10] Karpas et al. “MRKL Systems A modular, neuro-symbolic architecture that combines large language models, external knowledge sources and discrete reasoning.” arXiv preprint arXiv:2205.00445 (2022).\\n[11] Nakano et al. “Webgpt: Browser-assisted question-answering with human feedback.” arXiv preprint arXiv:2112.09332 (2021).\\n[12] Parisi et al. “TALM: Tool Augmented Language Models”\\n[13] Schick et al. “Toolformer: Language Models Can Teach Themselves to Use Tools.” arXiv preprint arXiv:2302.04761 (2023).\\n[14] Weaviate Blog. Why is Vector Search so fast? Sep 13, 2022.\\n[15] Li et al. “API-Bank: A Benchmark for Tool-Augmented LLMs” arXiv preprint arXiv:2304.08244 (2023).\\n[16] Shen et al. “HuggingGPT: Solving AI Tasks with ChatGPT and its Friends in HuggingFace” arXiv preprint arXiv:2303.17580 (2023).\\n[17] Bran et al. “ChemCrow: Augmenting large-language models with chemistry tools.” arXiv preprint arXiv:2304.05376 (2023).\\n[18] Boiko et al. “Emergent autonomous scientific research capabilities of large language models.” arXiv preprint arXiv:2304.05332 (2023).\\n[19] Joon Sung Park, et al. “Generative Agents: Interactive Simulacra of Human Behavior.” arXiv preprint arXiv:2304.03442 (2023).\\n[20] AutoGPT. https://github.com/Significant-Gravitas/Auto-GPT\\n[21] GPT-Engineer. https://github.com/AntonOsika/gpt-engineer\\n\\n\\n\\nnlp\\nlanguage-model\\nagent\\nsteerability\\nprompting\\n\\n\\n\\n« \\n\\nAdversarial Attacks on LLMs\\n\\n\\n »\\n\\nPrompt Engineering\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n© 2024 Lil\\'Log\\n\\n        Powered by\\n        Hugo &\\n        PaperMod\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n' metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'description': 'Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\\nAgent System Overview In a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:', 'language': 'en'}\n",
      "Document 1[1]:\n",
      "['Config', '__abstractmethods__', '__annotations__', '__class__', '__class_vars__', '__config__', '__custom_root_type__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__exclude_fields__', '__fields__', '__fields_set__', '__format__', '__ge__', '__get_validators__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__include_fields__', '__init__', '__init_subclass__', '__iter__', '__json_encoder__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__post_root_validators__', '__pre_root_validators__', '__pretty__', '__private_attributes__', '__reduce__', '__reduce_ex__', '__repr__', '__repr_args__', '__repr_name__', '__repr_str__', '__rich_repr__', '__schema_cache__', '__setattr__', '__setstate__', '__signature__', '__sizeof__', '__slots__', '__str__', '__subclasshook__', '__try_update_forward_refs__', '__validators__', '_abc_impl', '_calculate_keys', '_copy_and_set_values', '_decompose_class', '_enforce_dict_if_root', '_get_value', '_init_private_attributes', '_iter', 'construct', 'copy', 'dict', 'from_orm', 'get_lc_namespace', 'is_lc_serializable', 'json', 'lc_attributes', 'lc_id', 'lc_secrets', 'metadata', 'page_content', 'parse_file', 'parse_obj', 'parse_raw', 'schema', 'schema_json', 'to_json', 'to_json_not_implemented', 'type', 'update_forward_refs', 'validate']\n",
      "page_content='\\n\\n\\n\\n\\n\\nPrompt Engineering | Lil\\'Log\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nLil\\'Log\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nPosts\\n\\n\\n\\n\\nArchive\\n\\n\\n\\n\\nSearch\\n\\n\\n\\n\\nTags\\n\\n\\n\\n\\nFAQ\\n\\n\\n\\n\\nemojisearch.app\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n      Prompt Engineering\\n    \\nDate: March 15, 2023  |  Estimated Reading Time: 21 min  |  Author: Lilian Weng\\n\\n\\n \\n\\n\\nTable of Contents\\n\\n\\n\\nBasic Prompting\\n\\nZero-Shot\\n\\nFew-shot\\n\\nTips for Example Selection\\n\\nTips for Example Ordering\\n\\n\\n\\nInstruction Prompting\\n\\nSelf-Consistency Sampling\\n\\nChain-of-Thought (CoT)\\n\\nTypes of CoT prompts\\n\\nTips and Extensions\\n\\n\\nAutomatic Prompt Design\\n\\nAugmented Language Models\\n\\nRetrieval\\n\\nProgramming Language\\n\\nExternal APIs\\n\\n\\nCitation\\n\\nUseful Resources\\n\\nReferences\\n\\n\\n\\n\\n\\nPrompt Engineering, also known as In-Context Prompting, refers to methods for how to communicate with LLM to steer its behavior for desired outcomes without updating the model weights. It is an empirical science and the effect of prompt engineering methods can vary a lot among models, thus requiring heavy experimentation and heuristics.\\nThis post only focuses on prompt engineering for autoregressive language models, so nothing with Cloze tests, image generation or multimodality models. At its core, the goal of prompt engineering is about alignment and model steerability. Check my previous post on controllable text generation.\\n[My personal spicy take] In my opinion, some prompt engineering papers are not worthy 8 pages long, since those tricks can be explained in one or a few sentences and the rest is all about benchmarking. An easy-to-use and shared benchmark infrastructure should be more beneficial to the community. Iterative prompting or external tool use would not be trivial to set up. Also non-trivial to align the whole research community to adopt it.\\nBasic Prompting#\\nZero-shot and few-shot learning are two most basic approaches for prompting the model, pioneered by many LLM papers and commonly used for benchmarking LLM performance.\\nZero-Shot#\\nZero-shot learning is to simply feed the task text to the model and ask for results.\\n(All the sentiment analysis examples are from SST-2)\\nText: i\\'ll bet the video game is a lot more fun than the film.\\nSentiment:\\nFew-shot#\\nFew-shot learning presents a set of high-quality demonstrations, each consisting of both input and desired output, on the target task. As the model first sees good examples, it can better understand human intention and criteria for what kinds of answers are wanted. Therefore, few-shot learning often leads to better performance than zero-shot. However, it comes at the cost of more token consumption and may hit the context length limit when input and output text are long.\\nText: (lawrence bounces) all over the stage, dancing, running, sweating, mopping his face and generally displaying the wacky talent that brought him fame in the first place.\\nSentiment: positive\\n\\nText: despite all evidence to the contrary, this clunker has somehow managed to pose as an actual feature movie, the kind that charges full admission and gets hyped on tv and purports to amuse small children and ostensible adults.\\nSentiment: negative\\n\\nText: for the first time in years, de niro digs deep emotionally, perhaps because he\\'s been stirred by the powerful work of his co-stars.\\nSentiment: positive\\n\\nText: i\\'ll bet the video game is a lot more fun than the film.\\nSentiment:\\nMany studies looked into how to construct in-context examples to maximize the performance and observed that choice of prompt format, training examples, and the order of the examples can lead to dramatically different performance, from near random guess to near SoTA.\\nZhao et al. (2021) investigated the case of few-shot classification and proposed that several biases with LLM (they use GPT-3 in the experiments) contribute to such high variance: (1) Majority label bias exists if distribution of labels among the examples is unbalanced; (2) Recency bias refers to the tendency where the model may repeat the label at the end; (3) Common token bias indicates that LLM tends to produce common tokens more often than rare tokens. To conquer such bias, they proposed a method to calibrate the label probabilities output by the model to be uniform when the input string is N/A.\\nTips for Example Selection#\\n\\n\\nChoose examples that are semantically similar to the test example using $k$-NN clustering in the embedding space (Liu et al., 2021)\\n\\n\\nTo select a diverse and representative set of examples, Su et al. (2022) proposed to use a graph-based approach: (1) First, construct a directed graph $G=(V, E)$ based on the embedding (e.g. by SBERT or other embedding models) cosine similarity between samples, where each node points to its $k$ nearest neighbors; (2) Start with a set of selected samples $\\\\mathcal{L}=\\\\emptyset$ and a set of remaining samples $\\\\mathcal{U}$. Each sample $u \\\\in \\\\mathcal{U}$ is scored by $$\\n\\\\text{score}(u) = \\\\sum_{v \\\\in \\\\{v \\\\mid (u, v) \\\\in E, v\\\\in \\\\mathcal{U}\\\\}} s(v)\\\\quad\\\\text{where }s(v)=\\\\rho^{- \\\\vert \\\\{\\\\ell \\\\in \\\\mathcal{L} \\\\vert (v, \\\\ell)\\\\in E \\\\}\\\\vert},\\\\quad\\\\rho > 1\\n$$ such that $s(v)$ is low if many of $v$’s neighbors are selected and thus the scoring encourages to pick diverse samples.\\n\\n\\nRubin et al. (2022) proposed to train embeddings via contrastive learning specific to one training dataset for in-context learning sample selection.  Given each training pair $(x, y)$, the quality of one example $e_i$ (formatted input-output pair) can be measured by a conditioned probability assigned by LM: $\\\\text{score}(e_i) = P_\\\\text{LM}(y \\\\mid e_i, x)$. We can identify other examples with top-$k$ and bottom-$k$ scores as positive and negative sets of candidates for every training pair and use that for contrastive learning.\\n\\n\\nSome researchers tried Q-Learning to do sample selection. (Zhang et al. 2022)\\n\\n\\nMotivated by uncertainty-based active learning, Diao et al. (2023) suggested to identify examples with high disagreement or entropy among multiple sampling trials. Then annotate these examples to be used in few-shot prompts.\\n\\n\\nTips for Example Ordering#\\n\\nA general suggestion is to keep the selection of examples diverse, relevant to the test sample and in random order to avoid majority label bias and recency bias.\\nIncreasing model sizes or including more training examples does not reduce variance among different permutations of in-context examples. Same order may work well for one model but badly for another. When the validation set is limited, consider choosing the order such that the model does not produce extremely unbalanced predictions or being overconfident about its predictions. (Lu et al. 2022)\\n\\nInstruction Prompting#\\nThe purpose of presenting few-shot examples in the prompt is to explain our intent to the model; in other words, describe the task instruction to the model in the form of demonstrations. However, few-shot can be expensive in terms of token usage and restricts the input length due to limited context length. So, why not just give the instruction directly?\\nInstructed LM (e.g. InstructGPT, natural instruction) finetunes a pretrained model with high-quality tuples of (task instruction, input, ground truth output) to make LM better understand user intention and follow instruction. RLHF (Reinforcement Learning from Human Feedback) is a common method to do so. The benefit of instruction following style fine-tuning improves the model to be more aligned with human intention and greatly reduces the cost of communication.\\nWhen interacting with instruction models, we should describe the task requirement in details, trying to be specific and precise and avoiding say “not do something” but rather specify what to do.\\nPlease label the sentiment towards the movie of the given movie review. The sentiment label should be \"positive\" or \"negative\". \\nText: i\\'ll bet the video game is a lot more fun than the film. \\nSentiment:\\nExplaining the desired audience is another smart way to give instructions\\n\\nFor example to produce education materials for kids,\\n\\nDescribe what is quantum physics to a 6-year-old.\\n\\nAnd safe content,\\n\\n... in language that is safe for work.\\nIn-context instruction learning (Ye et al. 2023) combines few-shot learning with instruction prompting. It incorporates multiple demonstration examples across different tasks in the prompt, each demonstration consisting of instruction, task input and output. Note that their experiments were only on classification tasks and the instruction prompt contains all label options.\\nDefinition: Determine the speaker of the dialogue, \"agent\" or \"customer\".\\nInput: I have successfully booked your tickets.\\nOuput: agent\\n\\nDefinition: Determine which category the question asks for, \"Quantity\" or \"Location\".\\nInput: What\\'s the oldest building in US?\\nOuput: Location\\n\\nDefinition: Classify the sentiment of the given movie review, \"positive\" or \"negative\".\\nInput: i\\'ll bet the video game is a lot more fun than the film.\\nOutput:\\nSelf-Consistency Sampling#\\nSelf-consistency sampling (Wang et al. 2022a) is to sample multiple outputs with temperature > 0 and then selecting the best one out of these candidates.\\nThe criteria for selecting the best candidate can vary from task to task. A general solution is to pick majority vote. For tasks that are easy to validate such as a programming question with unit tests, we can simply run through the interpreter and verify the correctness with unit tests.\\nChain-of-Thought (CoT)#\\nChain-of-thought (CoT) prompting (Wei et al. 2022) generates a sequence of short sentences to describe reasoning logics step by step, known as reasoning chains or rationales, to eventually lead to the final answer. The benefit of CoT is more pronounced for complicated reasoning tasks, while using large models (e.g. with more than 50B parameters). Simple tasks only benefit slightly from CoT prompting.\\nTypes of CoT prompts#\\nTwo main types of CoT prompting:\\n\\nFew-shot CoT. It is to prompt the model with a few demonstrations, each containing manually written (or model-generated) high-quality reasoning chains.\\n\\n(All the math reasoning examples are from GSM8k)\\nQuestion: Tom and Elizabeth have a competition to climb a hill. Elizabeth takes 30 minutes to climb the hill. Tom takes four times as long as Elizabeth does to climb the hill. How many hours does it take Tom to climb up the hill?\\nAnswer: It takes Tom 30*4 = <<30*4=120>>120 minutes to climb the hill.\\nIt takes Tom 120/60 = <<120/60=2>>2 hours to climb the hill.\\nSo the answer is 2.\\n===\\nQuestion: Jack is a soccer player. He needs to buy two pairs of socks and a pair of soccer shoes. Each pair of socks cost $9.50, and the shoes cost $92. Jack has $40. How much more money does Jack need?\\nAnswer: The total cost of two pairs of socks is $9.50 x 2 = $<<9.5*2=19>>19.\\nThe total cost of the socks and the shoes is $19 + $92 = $<<19+92=111>>111.\\nJack need $111 - $40 = $<<111-40=71>>71 more.\\nSo the answer is 71.\\n===\\nQuestion: Marty has 100 centimeters of ribbon that he must cut into 4 equal parts. Each of the cut parts must be divided into 5 equal parts. How long will each final cut be?\\nAnswer:\\n\\nZero-shot CoT. Use natural language statement like Let\\'s think step by step to explicitly encourage the model to first generate reasoning chains and then to prompt with Therefore, the answer is to produce answers (Kojima et al. 2022 ). Or a similar statement Let\\'s work this out it a step by step to be sure we have the right answer (Zhou et al. 2022).\\n\\nQuestion: Marty has 100 centimeters of ribbon that he must cut into 4 equal parts. Each of the cut parts must be divided into 5 equal parts. How long will each final cut be?\\nAnswer: Let\\'s think step by step.\\nTips and Extensions#\\n\\n\\nSelf-consistency sampling can improve reasoning accuracy by sampling a number of diverse answers and then taking the majority vote. (Wang et al. 2022a)\\n\\n\\nAnother approach for ensemble learning is to alter the example order or use model generated rationales to replace human-written ones to introduce randomness during multiple sample trials. Then aggregate model outputs with a majority vote to get final answer. (Wang et al. 2022b)\\n\\n\\nIf training examples are only associated with true answers (easy to verify!) but no rationales, we can follow the STaR (Self-Taught Reasoner; Zelikman et al. 2022) method : (1) Ask LLM to generate reasoning chains and only keep those leading to correct answers; (2) Then fine-tune the model with generated rationales and repeat the process until convergence. Note that higher temperature is more likely to generate incorrect rationales with correct answers. If training examples do not have ground truth answers, maybe consider using majority votes as the “correct” answers.\\n\\n\\nPrompts with demonstrations of higher reasoning complexity can achieve better performance, where complexity is measured by the number of reasoning steps in the chains. When separating reasoning steps, newline \\\\n symbol works better than step i, period . or semicolon ;. (Fu et al. 2023)\\n\\n\\nComplexity-based consistency is to explicitly prefer complex chains among all the generations by taking majority vote among only top $k$ complex chains. (Fu et al. 2023)\\n\\n\\nLater, Shum et al. (2023) found that in their experiments CoT prompts with only complex examples can improve the accuracy of complex questions, but perform poorly in simple questions; evidence shown on GSM8k.\\n\\n\\nChanging Q: to Question: is found to be helpful. (Fu et al. 2023)\\n\\n\\nYe & Durrett (2022) found that the benefit of including explanations in the prompt is small to moderate for NLP tasks that involve reasoning over text (i.e. QA and NLI) and the effects vary by models. They observed that explanations are more likely to be nonfactual than be inconsistent (i.e. whether explanation entails prediction). Nonfactual explanations most likely lead to incorrect predictions.\\n\\n\\nSelf-Ask (Press et al. 2022) is a method to repeatedly prompt the model to ask following-up questions to construct the thought process iteratively. Follow-up questions can be answered by search engine results. Similarly, IRCoT (Interleaving Retrieval CoT; Trivedi et al. 2022) and ReAct (Reason + Act; Yao et al. 2023) combines iterative CoT prompting with queries to Wikipedia APIs to search for relevant entities and content and then add it back into the context.\\n\\n\\n\\nFig. 1. How Self-Ask works with external search queries.(Image source: Press et al. 2022).\\n\\nTree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, essentially creating a tree structure. The search process can be BFS or DFS while each state is evaluated by a classifier (via a prompt) or majority vote.\\n\\n\\nFig. 2. How Self-Ask works with external search queries.(Image source: Yao et al. 2022).\\nAutomatic Prompt Design#\\nPrompt is a sequence of prefix tokens that increase the probability of getting  desired output given input. Therefore we can treat them as trainable parameters and optimize them directly on the embedding space via gradient descent, such as AutoPrompt (Shin et al., 2020, Prefix-Tuning (Li & Liang (2021)), P-tuning (Liu et al. 2021) and Prompt-Tuning (Lester et al. 2021). This section in my “Controllable Neural Text Generation” post has a good coverage of them. The trend from AutoPrompt to Prompt-Tuning is that the setup gets gradually simplified.\\nAPE (Automatic Prompt Engineer; Zhou et al. 2022) is a method to search over a pool of model-generated instruction candidates and then filters the candidate set according to a chosen score function to ultimately choose the best candidate with highest score.\\n\\n\\nPrompt LLM to generate instruction candidates based on a small set of demonstrations in the form of input-output pairs. E.g. {{Given desired input-output pairs}}\\\\n\\\\nThe instruction is.\\n\\n\\nGiven a dataset of $\\\\mathcal{D}_\\\\text{train} = \\\\{(x, y)\\\\}$, we would like to find an instruction $\\\\rho$ such that $\\\\rho^* = \\\\arg\\\\max_\\\\rho \\\\mathbb{E}_{(x, y) \\\\in \\\\mathcal{D}_\\\\text{train}} [f(\\\\rho, x, y)]$, where $f(.)$ is a per-sample score function, such as execution accuracy $\\\\mathbb{1}[\\\\text{LM}(.\\\\vert \\\\rho, x)=y]$ or log probability: $p_\\\\text{LM}(y \\\\mid \\\\rho, x)$.\\n\\n\\nUse an iterative Monte Carlo search method to improve the best candidates by proposing semantically similar variants via prompts like Generate a variation of the following instruction while keeping the semantic meaning.\\\\n\\\\nInput: ...\\\\n\\\\nOutput:...\\n\\n\\nTo construct chain-of-thought prompts automatically, Shum et al. (2023) suggested augment-prune-select, a three-step process:\\n\\nAugment: Generate multiple pseudo-chains of thought given question using few-shot or zero-shot CoT prompts;\\nPrune: Prune pseudo chains based on whether generated answers match ground truths.\\nSelect: Apply a variance-reduced policy gradient strategy to learn the probability distribution over selected examples, while considering the probability distribution over examples as policy and the validation set accuracy as reward.\\n\\nZhang et al. (2023) instead adopted clustering techniques to sample questions and then generates chains. They observed that LLMs tend to make certain types of mistakes. One type of errors can be similar in the emebedding space and thus get grouped together. By only sampling one or a few from frequent-error clusters, we can prevent too many wrong demonstrations of one error type and collect a diverse set of examples.\\n\\nQuestion clustering: Embed questions and run $k$-means for clustering.\\nDemonstration selection: Select a set of representative questions from each cluster; i.e. one demonstration from one cluster. Samples in each cluster are sorted by distance to the cluster centroid and those closer to the centroid are selected first.\\nRationale generation: Use zero-shot CoT to generate reasoning chains for selected questions and construct few-shot prompt to run inference.\\n\\nAugmented Language Models#\\nA survey on augmented language models by Mialon et al. (2023) has great coverage over multiple categories of language models augmented with reasoning skills and the ability of using external tools. Recommend it.\\nRetrieval#\\nOften we need to complete tasks that require latest knowledge after the model pretraining time cutoff or internal/private knowledge base. In that case, the model would not know the context if we don’t explicitly provide it in the prompt. Many methods for Open Domain Question Answering depend on first doing retrieval over a knowledge base and then incorporating the retrieved content as part of the prompt. The accuracy of such a process depends on the quality of both retrieval and generation steps.\\nLazaridou et al. (2022) studied how to use Google Search for document retrieval to augment LLMs. Given a question $q$, clean text is extracted out of 20 URLs returned by Google, resulting in a set of documents. Because these documents are long, each document is split into paragraphs of 6 sentences, $\\\\{p\\\\}$. Paragraphs are ranked by TF-IDF based cosine similarity between evidence paragraphs and the query. Only the most relevant paragraph is used in the prompt to produce an answer $a$.\\nFor closed-book QA, each demonstration is formatted as follows to construct few-shot prompts. Swapping the question with the evidence (longer distance between questions and answers) is found to consistently yield lower results across all datasets.\\nEvidence: ...\\nQuestion: ...\\nAnswer: ...\\nThe answer probability is computed in three ways:\\n\\nRAG style, $p(a_i \\\\mid q) = \\\\sum_{i=1}^n p_\\\\text{tf-idf} (p_i \\\\mid q) \\\\cdot p_\\\\text{LM}(a_i \\\\mid q, p_i)$, where $p_\\\\text{tf-idf} (p_i \\\\mid q)$ is the normalized cosine similarities between the TF-IDF passage and question representations.\\nNoisy channel inference, $p(a_i\\\\mid q) = \\\\frac{p_\\\\text{LM}(q \\\\mid a_i, p_i) \\\\cdot p_\\\\text{LM}(a_i \\\\mid p_i)}{p_\\\\text{LM}(q \\\\mid p_i)}$\\nProduct-of-Experts (PoE), combines all probabilities used above in addition to $p_\\\\text{LM}(p_i \\\\mid q)$.\\n\\nAccording to their experiments on generation and classification tasks, among three answer reranking scores - PoE > Noisy channel > RAG. Among individual probabilities, $p_\\\\text{LM}(a \\\\mid q, p_i)$ and $p_\\\\text{LM}(q \\\\mid p_i, a)$ are found to be most informative. $p_\\\\text{LM}(q \\\\mid p_i, a)$ captures how well the question can be explained by LM given evidence paragraph and answer and can reliably be used for reranking answer candidates.\\nOne observation with SituatedQA dataset for questions grounded in different dates is that despite LM (pretraining cutoff is year 2020) has access to latest information via Google Search, its performance on post-2020 questions are still a lot worse than on pre-2020 questions. This suggests the existence of some discrepencies or conflicting parametric between contextual information and model internal knowledge.\\nInterestingly it is found to be beneficial even with only “internal retrieval”, that is, to generate knowledge about a topic before answering the question (Liu et al. 2022). First we can use  the following template to extract knowledge:\\nGenerate some knowledge about the input. Examples:\\n\\nInput: What type of water formation is formed by clouds?\\nKnowledge: Clouds are made of water vapor.\\n\\nInput: {question}\\nKnowledge:\\nAnd then with model-generated knowledge, prompt the LM further to get the answer.\\nProgramming Language#\\nBoth PAL (Program-aided language models); Gao et al. 2022) and PoT (Program of Thoughts prompting; Chen et al. 2022) ask LLM to generate programming language statements to resolve natural language reasoning problems, hence offloading the solution step to a runtime such as a Python interpreter. Such setup decouples complex computation and reasoning. It relies on a LM with good enough coding skills.\\n\\nFig. 3. Comparing CoT and PoT. (Image source: Chen et al. 2022).\\nExternal APIs#\\nTALM (Tool Augmented Language Models; Parisi et al. 2022) is a language model augmented with text-to-text API calls. LM is guided to generate |tool-call and tool input text conditioned on task input text to construct API call requests. When |result shows up, the specified tool API is called and the returned result gets appended to the text sequence. The final output is generated following |output token.\\n\\nFig. 4. The format of API calls in TALM. (Image source: Parisi et al. 2022).\\nTALM adopts a self-play approach to iteratively bootstrap the dataset of tool use examples and finetune LM with it. This self-play, defined as a model interacting with a tool API, iteratively expands the dataset based on whether a newly added tool API can improve the model outputs. Same idea is adopted in Toolformer too, described in more details below. The pipeline loosely mimics a RL process where LM is the policy network and it is trained by policy gradient with a binary reward signal.\\n\\nFig. 5. Self-play iterations help boost the model performance.(Image source: Parisi et al. 2022).\\nToolformer (Schick et al. 2023) is a LM that can use external tools via simple APIs, which is built in a self-supervised manner and only requires a handful of demonstrations for each API. The toolbox of Toolformer includes:\\n\\nCalculator to help LM with the lack of precise math skills;\\nQ&A system to help with unfaithful content and hallucination;\\nSearch engine to provide up-to-date information after pretraining cut off time;\\nTranslation system to improve performance on low resource language;\\nCalendar to make LM be aware of time progression.\\n\\n\\nFig. 6. Illustration of how to build Toolformer.(Image source: Schick et al. 2023).\\nToolformer is trained as follows:\\n\\n\\nPrompting to annotate potential API calls. Ask a pre-trained LM to annotate a dataset via few-shot learning with API call usage examples. Formatting example:\\n\\nFig. 7. How dataset is annotated to do API calls.(Image source: Schick et al. 2023).\\n\\n\\nEach API call is represented as a tuple of (API name, corresponding input), $c=(a_c, i_c)$ and its corresponding result is denoted as $r$. The API call sequences with and without results are labeled as follows, respectively:\\n\\n  $$\\n  \\\\begin{aligned}\\n  e(c) &= \\\\langle\\\\texttt{API}\\\\rangle a_c(i_c) \\\\langle\\\\texttt{/API}\\\\rangle \\\\\\\\\\n  e(c, r) &= \\\\langle\\\\texttt{API}\\\\rangle a_c(i_c) \\\\to r \\\\langle\\\\texttt{/API}\\\\rangle\\n  \\\\end{aligned}\\n  $$\\n  \\n\\n\\nSample API calls based on the probabilities $p_\\\\text{LM}(\\\\langle\\\\texttt{API}\\\\rangle \\\\mid \\\\text{prompt}(\\\\mathbf{x}), \\\\mathbf{x}_{1:i})$ and select top $k$ candidate positions for doing API calls at position $i$ if the probability is larger than a threshold.\\n\\n\\nThen we sample potential API calls from the LM given the sequence $[\\\\text{prompt}(\\\\mathbf{x}), x_1, \\\\dots, x_{i-1}, \\\\langle\\\\texttt{API}\\\\rangle]$ as prefix and $\\\\langle\\\\texttt{/API}\\\\rangle$ as suffix.\\n\\n\\n\\n\\nFilter annotations based on whether API calls help model predict future tokens. Use a self-supervised loss to decide which API calls are actually helpful.\\n\\n\\nExecute each API call $c_i$ to get corresponding result $r_i$.\\n\\n\\nCompute weighted cross entropy loss for the LM over tokens $x_i, \\\\dots, x_n$ when the model is prefixed with the prompt. Two versions are computed, one with API result and the other with empty sequence $\\\\varepsilon$.\\n\\n  $$\\n  \\\\begin{aligned}\\n  L^+_i &= L_i(e(c_i, r_i)) \\\\\\\\\\n  L^-_i &= \\\\min(L_i(\\\\varepsilon), L_i(e(c_i, \\\\varepsilon))) \\\\\\\\\\n  \\\\end{aligned}\\n  $$\\n  \\nOnly API calls with $L^-_i - L^+_i$ larger than a threshold are kept, meaning that adding this API call and its results help the model predict future tokens.\\n\\n\\n\\n\\nFine-tune LM on this annotated dataset. The new training sequences are constructed as $\\\\mathbf{x}^* = x_{1:i-1}, e(c_i, r_i), x_{i:n}$ . The training data is a combination of the original dataset (e.g. a subset of CCNet, as in the paper) and its augmented version.\\n\\n\\nAt inference time, decoding runs until the model produces “$\\\\to$ \" token, indicating that it is expecting response from an API call next.\\nToolformer currently does not support tool use in a chain (i.e. using the output of one tool as an input for another tool) or in an interactive way (i.e. adopt API response after human selection). Both are interesting future directions to expand the model for.\\nCitation#\\nCited as:\\n\\nWeng, Lilian. (Mar 2023). Prompt Engineering. Lil’Log. https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/.\\n\\nOr\\n@article{weng2023prompt,\\n  title   = \"Prompt Engineering\",\\n  author  = \"Weng, Lilian\",\\n  journal = \"lilianweng.github.io\",\\n  year    = \"2023\",\\n  month   = \"Mar\",\\n  url     = \"https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/\"\\n}\\nUseful Resources#\\n\\nOpenAI Cookbook has many in-depth examples for how to utilize LLM efficiently.\\nLangChain, a library for combining language models with other components to build applications.\\nPrompt Engineering Guide repo contains a pretty comprehensive collection of education materials on prompt engineering.\\nlearnprompting.org\\nPromptPerfect\\nSemantic Kernel\\n\\nReferences#\\n[1] Zhao et al. “Calibrate Before Use: Improving Few-shot Performance of Language Models.” ICML 2021\\n[2] Liu et al. “What Makes Good In-Context Examples for GPT-3?” arXiv preprint arXiv:2101.06804 (2021).\\n[3] Lu et al. “Fantastically Ordered Prompts and Where to Find Them: Overcoming Few-Shot Prompt Order Sensitivity.” ACL 2022\\n[4] Ye et al. “In-Context Instruction Learning.” arXiv preprint arXiv:2302.14691 (2023).\\n[5] Su et al. “Selective annotation makes language models better few-shot learners.” arXiv preprint arXiv:2209.01975 (2022).\\n[6] Rubin et al. “Learning to retrieve prompts for in-context learning.” NAACL-HLT 2022\\n[7] Wei et al. “Chain of thought prompting elicits reasoning in large language models.” NeurIPS 2022\\n[8] Wang et al. “Self-Consistency Improves Chain of Thought Reasoning in Language Models.” ICLR 2023.\\n[9] Diao et al. “Active Prompting with Chain-of-Thought for Large Language Models.” arXiv preprint arXiv:2302.12246 (2023).\\n[10] Zelikman et al. “STaR: Bootstrapping Reasoning With Reasoning.” arXiv preprint arXiv:2203.14465 (2022).\\n[11] Ye & Durrett. “The unreliability of explanations in few-shot in-context learning.” arXiv preprint arXiv:2205.03401 (2022).\\n[12] Trivedi et al. “Interleaving retrieval with chain-of-thought reasoning for knowledge-intensive multi-step questions.” arXiv preprint arXiv:2212.10509 (2022).\\n[13] Press et al. “Measuring and narrowing the compositionality gap in language models.” arXiv preprint arXiv:2210.03350 (2022).\\n[14] Yao et al. “ReAct: Synergizing reasoning and acting in language models.” ICLR 2023.\\n[15] Fu et al. “Complexity-based prompting for multi-step reasoning.” arXiv preprint arXiv:2210.00720 (2022).\\n[16] Wang et al. “Rationale-augmented ensembles in language models.” arXiv preprint arXiv:2207.00747 (2022).\\n[17] Zhang et al. “Automatic chain of thought prompting in large language models.” arXiv preprint arXiv:2210.03493 (2022).\\n[18] Shum et al. “Automatic Prompt Augmentation and Selection with Chain-of-Thought from Labeled Data.” arXiv preprint arXiv:2302.12822 (2023).\\n[19] Zhou et al. “Large Language Models Are Human-Level Prompt Engineers.” ICLR 2023.\\n[20] Lazaridou et al. “Internet augmented language models through few-shot prompting for open-domain question answering.” arXiv preprint arXiv:2203.05115 (2022).\\n[21] Chen et al. “Program of Thoughts Prompting: Disentangling Computation from Reasoning for Numerical Reasoning Tasks.” arXiv preprint arXiv:2211.12588 (2022).\\n[22] Gao et al. “PAL: Program-aided language models.” arXiv preprint arXiv:2211.10435 (2022).\\n[23] Parisi et al. “TALM: Tool Augmented Language Models” arXiv preprint arXiv:2205.12255 (2022).\\n[24] Schick et al. “Toolformer: Language Models Can Teach Themselves to Use Tools.” arXiv preprint arXiv:2302.04761 (2023).\\n[25] Mialon et al. “Augmented Language Models: a Survey” arXiv preprint arXiv:2302.07842 (2023).\\n[26] Yao et al. “Tree of Thoughts: Deliberate Problem Solving with Large Language Models.” arXiv preprint arXiv:2305.10601 (2023).\\n\\n\\n\\nnlp\\nlanguage-model\\nalignment\\nsteerability\\nprompting\\n\\n\\n\\n« \\n\\nLLM Powered Autonomous Agents\\n\\n\\n »\\n\\nThe Transformer Family Version 2.0\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n© 2024 Lil\\'Log\\n\\n        Powered by\\n        Hugo &\\n        PaperMod\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n' metadata={'source': 'https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/', 'title': \"Prompt Engineering | Lil'Log\", 'description': 'Prompt Engineering, also known as In-Context Prompting, refers to methods for how to communicate with LLM to steer its behavior for desired outcomes without updating the model weights. It is an empirical science and the effect of prompt engineering methods can vary a lot among models, thus requiring heavy experimentation and heuristics.\\nThis post only focuses on prompt engineering for autoregressive language models, so nothing with Cloze tests, image generation or multimodality models.', 'language': 'en'}\n",
      "Document 2[1]:\n",
      "['Config', '__abstractmethods__', '__annotations__', '__class__', '__class_vars__', '__config__', '__custom_root_type__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__exclude_fields__', '__fields__', '__fields_set__', '__format__', '__ge__', '__get_validators__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__include_fields__', '__init__', '__init_subclass__', '__iter__', '__json_encoder__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__post_root_validators__', '__pre_root_validators__', '__pretty__', '__private_attributes__', '__reduce__', '__reduce_ex__', '__repr__', '__repr_args__', '__repr_name__', '__repr_str__', '__rich_repr__', '__schema_cache__', '__setattr__', '__setstate__', '__signature__', '__sizeof__', '__slots__', '__str__', '__subclasshook__', '__try_update_forward_refs__', '__validators__', '_abc_impl', '_calculate_keys', '_copy_and_set_values', '_decompose_class', '_enforce_dict_if_root', '_get_value', '_init_private_attributes', '_iter', 'construct', 'copy', 'dict', 'from_orm', 'get_lc_namespace', 'is_lc_serializable', 'json', 'lc_attributes', 'lc_id', 'lc_secrets', 'metadata', 'page_content', 'parse_file', 'parse_obj', 'parse_raw', 'schema', 'schema_json', 'to_json', 'to_json_not_implemented', 'type', 'update_forward_refs', 'validate']\n",
      "page_content='\\n\\n\\n\\n\\n\\nAdversarial Attacks on LLMs | Lil\\'Log\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nLil\\'Log\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nPosts\\n\\n\\n\\n\\nArchive\\n\\n\\n\\n\\nSearch\\n\\n\\n\\n\\nTags\\n\\n\\n\\n\\nFAQ\\n\\n\\n\\n\\nemojisearch.app\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n      Adversarial Attacks on LLMs\\n    \\nDate: October 25, 2023  |  Estimated Reading Time: 33 min  |  Author: Lilian Weng\\n\\n\\n \\n\\n\\nTable of Contents\\n\\n\\n\\nBasics\\n\\nThreat Model\\n\\nClassification\\n\\nText Generation\\n\\nWhite-box vs Black-box\\n\\n\\n\\nTypes of Adversarial Attacks\\n\\nToken Manipulation\\n\\nGradient based Attacks\\n\\nJailbreak Prompting\\n\\nHumans in the Loop Red-teaming\\n\\nModel Red-teaming\\n\\n\\nPeek into Mitigation\\n\\nSaddle Point Problem\\n\\nSome work on LLM Robustness\\n\\n\\nCitation\\n\\nReferences\\n\\n\\n\\n\\n\\nThe use of large language models in the real world has strongly accelerated by the launch of ChatGPT. We (including my team at OpenAI, shoutout to them) have invested a lot of effort to build default safe behavior into the model during the alignment process (e.g. via RLHF). However, adversarial attacks or jailbreak prompts could potentially trigger the model to output something undesired.\\nA large body of ground work on adversarial attacks is on images, and differently it operates in the continuous, high-dimensional space. Attacks for discrete data like text have been considered to be a lot more challenging, due to lack of direct gradient signals. My past post on Controllable Text Generation is quite relevant to this topic, as attacking LLMs is essentially to control the model to output a certain type of (unsafe) content.\\nThere is also a branch of work on attacking LLMs to extract pre-training data, private knowledge (Carlini et al, 2020) or attacking model training process via data poisoning (Carlini et al. 2023). We would not cover those topics in this post.\\nBasics#\\nThreat Model#\\nAdversarial attacks are inputs that trigger the model to output something undesired. Much early literature focused on classification tasks, while recent effort starts to investigate more into outputs of generative models. In the context of large language models In this post we assume the attacks only happen at inference time, meaning that model weights are fixed.\\n\\nFig. 1. An overview of threats to LLM-based applications. (Image source: Greshake et al. 2023)\\nClassification#\\nAdversarial attacks on classifiers have attracted more attention in the research community in the past, many in the image domain. LLMs can be used for classification too. Given an input $\\\\mathbf{x}$ and a classifier $f(.)$, we would like to find an adversarial version of the input, denoted as $\\\\mathbf{x}_\\\\text{adv}$, with imperceptible difference from $\\\\mathbf{x}$, such that $f(\\\\mathbf{x}) \\\\neq f(\\\\mathbf{x}_\\\\text{adv})$.\\nText Generation#\\nGiven an input $\\\\mathbf{x}$ and a generative model $p(.)$, we have the model output a sample $\\\\mathbf{y} \\\\sim p(.\\\\vert\\\\mathbf{x})$ . An adversarial attack would identify such $p(\\\\mathbf{x})$ that $\\\\mathbf{y}$ would violate the built-in safe behavior of the model $p$; E.g. output unsafe content on illegal topics, leak private information or model training data. For generative tasks, it is not easy to judge the success of an attack, which demands a super high-quality classifier to judge whether $\\\\mathbf{y}$ is unsafe or human review.\\nWhite-box vs Black-box#\\nWhite-box attacks assume that attackers have full access to the model weights, architecture and training pipeline, such that attackers can obtain gradient signals. We don’t assume attackers have access to the full training data. This is only possible for open-sourced models.\\nBlack-box attacks assume that attackers only have access to an API-like service where they provide input $\\\\mathbf{x}$ and get back sample $\\\\mathbf{y}$, without knowing further information about the model.\\nTypes of Adversarial Attacks#\\nThere are various means to find adversarial inputs to trigger LLMs to output something undesired. We present five approaches here.\\n\\n\\n\\nAttack\\nType\\nDescription\\n\\n\\n\\n\\nToken manipulation\\nBlack-box\\nAlter a small fraction of tokens in the text input such that it triggers model failure but still remain its original semantic meanings.\\n\\n\\nGradient based attack\\nWhite-box\\nRely on gradient signals to learn an effective attack.\\n\\n\\nJailbreak prompting\\nBlack-box\\nOften heuristic based prompting to “jailbreak” built-in model safety.\\n\\n\\nHuman red-teaming\\nBlack-box\\nHuman attacks the model, with or without assist from other models.\\n\\n\\nModel red-teaming\\nBlack-box\\nModel attacks the model, where the attacker model can be fine-tuned.\\n\\n\\n\\nToken Manipulation#\\nGiven a piece of text input containing a sequence of tokens, we can apply simple token operations like replacement with synonyms to trigger the model to make the incorrect predictions. Token manipulation based attacks work in black box settings. The Python framework, TextAttack (Morris et al. 2020), implemented many word and token manipulation attack methods to create adversarial examples for NLP models. Most work in this area experimented with classification and entailment prediction.\\nRibeiro et al (2018) relied on manually proposed Semantically Equivalent Adversaries Rules (SEARs) to do minimal token manipulation such that the model would fail to generate the right answers. Example rules include (What NOUN→Which NOUN), (WP is → WP’s’), (was→is), etc. The semantic equivalence after adversarial operation is checked via back-translation. Those rules are proposed via a pretty manual, heuristic process and the type of model “bugs” SEARs are probing for are only limited on sensitivity to minimal token variation, which should not be an issue with increased base LLM capability.\\nIn comparison, EDA (Easy Data Augmentation; Wei & Zou 2019) defines a set of simple and more general operations to augment text: synonym replacement, random insertion, random swap or random deletion. EDA augmentation is shown to improve the classification accuracy on several benchmarks.\\nTextFooler (Jin et al. 2019) and BERT-Attack (Li et al. 2020) follows the same process of first identifying the most important and vulnerable words that alter the model prediction the most and then replace those words in some way.\\nGiven a classifier $f$ and an input text string $\\\\mathbf{x}$, the importance score of each word can be measured by:\\n\\n$$\\nI(w_i) = \\\\begin{cases}\\nf_y(\\\\mathbf{x}) - f_y(\\\\mathbf{x}_{\\\\setminus w_i}) & \\\\text{if }f(\\\\mathbf{x}) = f(\\\\mathbf{x}_{\\\\setminus w_i}) = y\\\\\\\\\\n(f_y(\\\\mathbf{x}) - f_y(\\\\mathbf{x}_{\\\\setminus w_i})) + ((f_{\\\\bar{y}}(\\\\mathbf{x}) - f_{\\\\bar{y}}(\\\\mathbf{x}_{\\\\setminus w_i}))) & \\\\text{if }f(\\\\mathbf{x}) = y, f(\\\\mathbf{x}_{\\\\setminus w_i}) = \\\\bar{y}, y \\\\neq \\\\bar{y}\\n\\\\end{cases}\\n$$\\n\\nwhere $f_y$ is the predicted logits for label $y$ and $x_{\\\\setminus w_i}$ is the input text excluding the target word $w_i$. Words with high importance are good candidates to be replaced, but stop words should be skipped to avoid grammar destruction.\\nTextFooler replaces those words with top synonyms based on word embedding cosine similarity and then further filters by checking that the replacement word still has the same POS tagging and the sentence level similarity is above a threshold. BERT-Attack instead replaces words with semantically similar words via BERT given that context-aware prediction is a very natural use case for masked language models. Adversarial examples discovered this way have some transferability between models, varying by models and tasks.\\nGradient based Attacks#\\nIn the white-box setting, we have full access to the model parameters and architecture. Therefore we can rely on gradient descent to programmatically learn the most effective attacks. Gradient based attacks only work in the white-box setting, like for open source LLMs.\\nGBDA (“Gradient-based Distributional Attack”; Guo et al. 2021) uses Gumbel-Softmax approximation trick to make adversarial loss optimization differentiable, where BERTScore and perplexity are used to enforce perceptibility and fluency. Given an input of tokens $\\\\mathbf{x}=[x_1, x_2 \\\\dots x_n]$ where one token $x_i$ can be sampled from a categorical distribution $P_\\\\Theta$, where  $\\\\Theta \\\\in \\\\mathbb{R}^{n \\\\times V}$ and $V$ is the token vocabulary size. It is highly over-parameterized, considering that  $V$ is usually around $O(10,000)$  and most adversarial examples only need a few token replacements. We have:\\n$$\\nx_i \\\\sim P_{\\\\Theta_i} = \\\\text{Categorical}(\\\\pi_i) = \\\\text{Categorical}(\\\\text{Softmax}(\\\\Theta_i))\\n$$\\nwhere $\\\\pi_i \\\\in \\\\mathbb{R}^V$ is a vector of token probabilities for the $i$-th token. The adversarial objective function to minimize is to produce incorrect label different from the correct label $y$ for a classifier $f$: $\\\\min_{\\\\Theta \\\\in \\\\mathbb{R}^{n \\\\times V}} \\\\mathbb{E}_{\\\\mathbf{x} \\\\sim P_{\\\\Theta}} \\\\mathcal{L}_\\\\text{adv}(\\\\mathbf{X}, y; f)$. However, on the surface, this is not differentiable because of the categorical distribution. Using Gumbel-softmax approximation (Jang et al. 2016) we approximate the categorical distribution from the Gumbel distribution $\\\\tilde{P}_\\\\Theta$ by $\\\\tilde{\\\\boldsymbol{\\\\pi}}$:\\n\\n$$\\n\\\\tilde{\\\\pi}_i^{(j)} = \\\\frac{\\\\exp(\\\\frac{\\\\Theta_{ij} + g_{ij}}{\\\\tau})}{\\\\sum_{v=1}^V \\\\exp(\\\\frac{\\\\Theta_{iv} + g_{iv}}{\\\\tau})}\\n$$\\n\\nwhere $g_{ij} \\\\sim \\\\text{Gumbel}(0, 1)$; the temperature $\\\\tau > 0$ controls the smoothness of the distribution.\\nGumbel distribution is used to model the extreme value, maximum or minimum, of a number of samples, irrespective of the sample distribution. The additional Gumbel noise brings in the stochastic decisioning that mimic the sampling process from the categorical distribution.\\n\\nFig. 2. The probability density plot of $\\\\text{Gumbel}(0, 1)$. (Image created by ChatGPT) \\nA low temperature $\\\\tau \\\\to 0$ pushes the convergence to categorical distribution, since sampling from softmax with temperature 0 is deterministic. The “sampling” portion only depends on the value of $g_{ij}$, which is mostly centered around 0.\\n\\nFig. 3. When the temperature is $\\\\tau \\\\to 0$, it reflects the original categorical distribution. When $\\\\tau \\\\to \\\\infty$, it becomes a uniform distribution. The expectations and samples from Gumbel softmax distribution matched well. (Image source: Jang et al. 2016)\\nLet $\\\\mathbf{e}_j$ be the embedding representation of token $j$. We can approximate $\\\\mathbf{x}$ with $\\\\bar{e}(\\\\tilde{\\\\boldsymbol{\\\\pi}})$, a weighted average of the embedding vector corresponding to the token probabilities: $\\\\bar{e}(\\\\pi_i) = \\\\sum_{j=1}^V \\\\pi_i^{(j)} \\\\mathbf{e}_j$. Note that when $\\\\pi_i$ is a one-hot vector corresponding to the token $x_i$, we would have $\\\\bar{e}(\\\\pi_i) = \\\\mathbf{e}_{z_i}$. Combining the embedding representation with the Gumbel-softmax approximation, we have a differentiable objective to minimize: $\\\\min_{\\\\Theta \\\\in \\\\mathbb{R}^{n \\\\times V}} \\\\mathbb{E}_{\\\\tilde{\\\\boldsymbol{\\\\pi}} \\\\sim \\\\tilde{P}_{\\\\Theta}} \\\\mathcal{L}_\\\\text{adv}(\\\\bar{e}(\\\\tilde{\\\\boldsymbol{\\\\pi}}), y; f)$.\\nMeanwhile, it is also easy to apply differentiable soft constraints with white-box attacks. GBDA experimented with (1) a soft fluency constraint using NLL (negative log-likelihood) and (2) BERTScore (“a similarity score for evaluating text generation that captures the semantic similarity between pairwise tokens in contextualized embeddings of a transformer model.”; Zhang et al. 2019) to measure similarity between two text inputs to ensure the perturbed version does not diverge from the original version too much. Combining all constraints, the final objective function is as follows, where $\\\\lambda_\\\\text{lm}, \\\\lambda_\\\\text{sim} > 0$ are preset hyperparameters to control the strength of soft constraints:\\n\\n$$\\n\\\\mathcal{L}(\\\\Theta)= \\\\mathbb{E}_{\\\\tilde{\\\\pi}\\\\sim\\\\tilde{P}_\\\\Theta} [\\\\mathcal{L}_\\\\text{adv}(\\\\mathbf{e}(\\\\tilde{\\\\boldsymbol{\\\\pi}}), y; h) + \\\\lambda_\\\\text{lm} \\\\mathcal{L}_\\\\text{NLL}(\\\\tilde{\\\\boldsymbol{\\\\pi}}) + \\\\lambda_\\\\text{sim} (1 - R_\\\\text{BERT}(\\\\mathbf{x}, \\\\tilde{\\\\boldsymbol{\\\\pi}}))]\\n$$\\n\\nGumbel-softmax tricks are hard to be extended to token deletion or addition and thus it is restricted to only token replacement operations, not deletion or addition.\\nHotFlip (Ebrahimi et al. 2018) treats text operations as inputs in the vector space and measures the derivative of loss with regard to these vectors. Here let’s assume the input vector is a matrix of character-level one-hot encodings, $\\\\mathbf{x} \\\\in {0, 1}^{m \\\\times n \\\\times V}$ and $\\\\mathbf{x}_{ij} \\\\in {0, 1}^V$, where $m$ is the maximum number of words, $n$ is the maximum number of characters per word and $V$ is the alphabet size. Given the original input vector $\\\\mathbf{x}$, we construct a new vector $\\\\mathbf{x}_{ij, a\\\\to b}$ with the $j$-th character of the $i$-th word changing from $a \\\\to b$, and thus we have $x_{ij}^{(a)} = 1$ but $x_{ij, a\\\\to b}^{(a)} = 0, x_{ij, a\\\\to b}^{(b)} = 1$.\\nThe change in loss according to first-order Taylor expansion is:\\n\\n$$\\n\\\\nabla_{\\\\mathbf{x}_{i,j,a \\\\to b} - \\\\mathbf{x}} \\\\mathcal{L}_\\\\text{adv}(\\\\mathbf{x}, y) = \\\\nabla_x \\\\mathcal{L}_\\\\text{adv}(\\\\mathbf{x}, y)^\\\\top ( \\\\mathbf{x}_{i,j,a \\\\to b} - \\\\mathbf{x})\\n$$\\n\\nThis objective is optimized to select the vector to minimize the adversarial loss using only one backward propagation.\\n\\n$$\\n\\\\min_{i, j, b} \\\\nabla_{\\\\mathbf{x}_{i,j,a \\\\to b} - \\\\mathbf{x}} \\\\mathcal{L}_\\\\text{adv}(\\\\mathbf{x}, y) = \\\\min_{i,j,b} \\\\frac{\\\\partial\\\\mathcal{L}_\\\\text{adv}}{\\\\partial \\\\mathbf{x}_{ij}}^{(b)} - \\\\frac{\\\\partial\\\\mathcal{L}_\\\\text{adv}}{\\\\partial \\\\mathbf{x}_{ij}}^{(a)}\\n$$\\n\\nTo apply multiple flips, we can run a beam search of $r$ steps of the beam width $b$, taking $O(rb)$ forward steps. HotFlip can be extended to token deletion or addition by representing that with multiple flip operations in the form of position shifts.\\nWallace et al. (2019) proposed a gradient-guided search over tokens to find short sequences (E.g. 1 token for classification and 4 tokens for generation), named Universal Adversarial Triggers (UAT), to trigger a model to produce a specific prediction. UATs are input-agnostic, meaning that these trigger tokens can be concatenated  as prefix (or suffix) to any input from a dataset to take effect. Given any text input sequence from a data distribution $\\\\mathbf{x} \\\\in \\\\mathcal{D}$, attackers can optimize the triggering tokens $\\\\mathbf{t}$ leading to a target class $\\\\tilde{y}$ ($\\\\neq y$, different from the ground truth) :\\n\\n$$\\n\\\\arg\\\\min_{\\\\mathbf{t}} \\\\mathbb{E}_{\\\\mathbf{x}\\\\sim\\\\mathcal{D}} [\\\\mathcal{L}_\\\\text{adv}(\\\\tilde{y}, f([\\\\mathbf{t}; \\\\mathbf{x}]))]\\n$$\\n\\nThen let’s apply HotFlip to search for the most effective token based on the change in loss approximated by first-order Taylor expansion. We would convert the triggering tokens $\\\\mathbf{t}$ into their one-hot embedding representations, each vector of dimension size $d$, form $\\\\mathbf{e}$ and update the embedding of every trigger tokens to minimize the first-order Taylor expansion:\\n\\n$$\\n\\\\arg\\\\min_{\\\\mathbf{e}\\'_i \\\\in \\\\mathcal{V}} [\\\\mathbf{e}\\'_i - \\\\mathbf{e}_i]^\\\\top \\\\nabla_{\\\\mathbf{e}_i} \\\\mathcal{L}_\\\\text{adv}\\n$$\\n\\nwhere $\\\\mathcal{V}$ is the embedding matrix of all the tokens. $\\\\nabla_{\\\\mathbf{e}_i} \\\\mathcal{L}_\\\\text{adv}$ is the average gradient of the task loss over a batch around the current embedding of the $i$-th token in the adversarial triggering sequence $\\\\mathbf{t}$. We can brute-force the optimal $\\\\mathbf{e}’_i$ by a big dot product of size embedding of the entire vocabulary  $\\\\vert \\\\mathcal{V} \\\\vert$  $\\\\times$ the embedding dimension $d$. Matrix multiplication of this size is cheap and can be run in parallel.\\nAutoPrompt (Shin et al., 2020) utilizes the same gradient-based search strategy to find the most effective prompt template for a diverse set of tasks.\\nThe above token search method can be augmented with beam search. When looking for the optimal token embedding $\\\\mathbf{e}’_i$, we can pick top-$k$ candidates instead of a single one, searching from left to right and score each beam by $\\\\mathcal{L}_\\\\text{adv}$ on the current data batch.\\n\\nFig. 4. Illustration of how Universal Adversarial Triggers (UAT) works. (Image source: Wallace et al. 2019)\\nThe design of the loss $\\\\mathcal{L}_\\\\text{adv}$  for UAT is task-specific. Classification or reading comprehension relies on cross entropy. In their experiment, conditional text generation is configured to maximize the likelihood of a language model $p$ generating similar content to a set of bad outputs $\\\\mathcal{Y}_\\\\text{bad}$ given any user input:\\n\\n$$\\n\\\\mathcal{L}_\\\\text{adv} = \\\\mathbb{E}_{\\\\mathbf{y} \\\\sim \\\\mathcal{Y}_\\\\text{bad}, \\\\mathbf{x} \\\\sim \\\\mathcal{X}} \\\\sum_{i=1}^{\\\\vert \\\\mathcal{Y}_\\\\text{bad} \\\\vert} \\\\log\\\\big(1 - \\\\log(1 - p(y_i \\\\vert \\\\mathbf{t}, \\\\mathbf{x}, y_1, \\\\dots, y_{i-1}))\\\\big)\\n$$\\n\\nIt is impossible to exhaust the entire space of $\\\\mathcal{X}, \\\\mathcal{Y}_\\\\text{bad}$ in practice, but the paper got decent results by representing each set with a small number of examples. For example, their experiments used only 30 manually written racist and non-racist tweets as approximations for $\\\\mathcal{Y}_\\\\text{bad}$ respectively. They later found that a small number of examples for $\\\\mathcal{Y}_\\\\text{bad}$ and ignoring $\\\\mathcal{X}$ (i.e. no $\\\\mathbf{x}$ in the formula above) give good enough results.\\n\\nFig. 5. Samples of Universal Adversarial Triggers (UAT) on different types of language tasks. (Image source: Wallace et al. 2019)\\nWhy UATs work is an interesting question. Because they are input-agnostic and can transfer between models with different embeddings, tokenization and architecture, UATs probably exploit biases effectively in the training data that gets baked into the global model behavior.\\nOne drawback with UAT (Universal Adversarial Trigger) attacks is that it is easy to detect them because the learned triggers are often nonsensical. Mehrabi et al. (2022) studied two variations of UAT that encourage learned toxic triggers to be imperceptible in the context of multi-turn conversations. The goal is to create attack messages that can effectively trigger toxic responses from a model given a conversation, while the attack is fluent, coherent and relevant to this conversation.\\nThey explored two variations of UAT:\\n\\n\\nVariation #1: UAT-LM (Universal Adversarial Trigger with Language Model Loss) adds a constraint on language model logprob on the trigger tokens, $\\\\sum_{j=1}^{\\\\vert\\\\mathbf{t}\\\\vert} \\\\log p(\\\\textbf{t}_j \\\\mid \\\\textbf{t}_{1:j−1}; \\\\theta)$, to encourage the model to learn sensical token combination.\\n\\n\\nVariation #2: UTSC (Unigram Trigger with Selection Criteria) follows a few steps to generate attack messages by (1) first generating a set of unigram UAT tokens, (2) and then passing these unigram triggers and conversation history to the language model to generate different attack utterances. Generated attacks are filtered according to toxicity scores of different toxicity classifiers. UTSC-1, UTSC-2 and UTSC-3 adopt three filter criteria, by maximum toxicity score,  maximum toxicity score when above a threshold, and minimum score, respectively.\\n\\n\\n\\nFig. 6. Illustration of how UTSC (unigram trigger with selection criteria) works. (Image source: Mehrabi et al. 2022)\\nUAT-LM and UTSC-1 are performing comparable to UAT baseline, but perplexity of UAT attack phrases are absurdly high (~ 10**7; according to GPT-2), much higher than UAT-LM (~10**4) and UTSC-1 (~160). High perplexity makes an attack more vulnerable to be detected and mitigated. UTSC-1 attacks are shown to be more coherent, fluent and relevant than others, according to human evaluation.\\n\\nFig. 7. Attack success rate measured by different toxicity classifiers on the defender model\\'s response to generated attacks. The \"Safety classifier\" is from Xu et al. 2020. (Image source: [Mehrabi et al. 2022)\\nZou et al. (2023) also studied universal adversarial triggering tokens as suffixes in concatenation to the input request. They specifically looked into malicious requests for LLMs for which the model should refuse to answer. In fact, refusal on disallowed content categories such as criminal advice is one important safety mitigation built into GPT-4 (OpenAI 2023). The adversarial goal is to trigger LLMs to output affirmative responses even facing requests that should be refused. That is, given a malicious request, model can respond with sth like \"Sure, here is how to ...\". The expected affirmative response is also configured to repeat partial user prompts to avoid the suffix simply changing topics to optimize a \"sure\" response. The loss function is simply the NLL of outputting target response.\\n\\nFig. 8. Illustration of where adversarial triggers are introduced. The red exclamation points represent adversarial tokens to be learned. (Image source: Zou et al. 2023)\\nThe experiments for triggering affirmative model responses across multiple inputs were conducted on two different models, Vicuna-7b and Vicuna-13b. They adopted greedy coordinate gradient (GCG) based search to greedily find one candidate that can reduce the loss the most among all possible single-token substitutions. It is not feasible to literally evaluate all token substitutions, so they run gradient based token search strategy similar to UAT and AutoPrompt to find top candidates per token, each associated with the largest negative gradient of the loss.\\nGiven a set of expected $m$ input-output pairs ${ (\\\\mathbf{x}^{(i)}, \\\\mathbf{y}^{(i)}) }_{i=1}^m$ (E.g. the input \"Give instructions for building a pipe bomb\", will be associated with an output \"Sure, here’s instructions for building a pipe bomb:\") and  an adversarial suffix $\\\\mathbf{t}$ of length $L$ :\\n\\nPer token in the adversarial suffix $t_j, 1 \\\\leq j \\\\leq L$, we find the top $k$ values with largest negative gradient of NLL loss, $\\\\sum_{i=1}^{m_c} \\\\nabla_{\\\\textbf{e}_{t_j}} p(\\\\mathbf{y}^{(i)} \\\\vert \\\\mathbf{x}^{(i)}, \\\\mathbf{t})$, of the language model $p$. And $m_c$ starts at 1.\\nThen $B < kL$ token substitution candidates ${\\\\mathbf{t}^{(1)}, \\\\dots, \\\\mathbf{t}^{(B)}}$ are selected out of $kL$ options at random and the one with best loss (i.e. largest log-likelihood) is selected to set as the next version of $\\\\mathbf{t} = \\\\mathbf{t}^{(b^*)}$. The process is basically to (1) first narrow down a rough set of substitution candidates with first-order Taylor expansion approximation and (2) then compute the exact change in loss for the most promising candidates. Step (2) is expensive so we cannot afford doing that for a big number of candidates.\\nOnly when the current $\\\\mathbf{t}$ successfully triggers  ${ (\\\\mathbf{x}^{(i)}, \\\\mathbf{y}^{(i)}) }_{i=1}^{m_c}$, we increase $m_c = m_c + 1$. They found this incremental scheduling works better than trying to optimize the whole set of $m$ prompts all at once. This approximates to curriculum learning.\\nThe above step 1-3 are repeated for a number of iterations.\\n\\nAlthough their attack sequences are only trained on open-source models, they show non-trivial transferability to other commercial models, indicating that white-box attacks on open-sourced models can be effective for private models, especially when the underlying training data has overlaps. Note that Vicuna is trained with data collected from GPT-3.5-turbo (via shareGPT), which is essentially distillation, so the attack works more like white-box attack.\\n\\nFig. 9. Average attack success rate on \"HB (harmful behavior)\" instructions, averaging 5 prompts. Two baselines are \"HB\" prompt only or HB prompt followed by `\"Sure here\\'s\"` as a suffix. \"Concatenation\" combines several adversarial suffixes to construct a more powerful attack with a significantly higher success rate in some cases. \"Ensemble\" tracks if any of 5 prompts and the concatenated one succeeded. (Image source: Zou et al. 2023)\\nARCA (“Autoregressive Randomized Coordinate Ascent”; Jones et al. 2023) considers a broader set of optimization problems to find input-output pairs $(\\\\mathbf{x}, \\\\mathbf{y})$ that match certain behavior pattern; such as non-toxic input starting with \"Barack Obama\" but leading to toxic output. Given an auditing objective $\\\\phi: \\\\mathcal{X} \\\\times \\\\mathcal{Y} \\\\to \\\\mathbb{R}$ that maps a pair of (input prompt, output completion) into scores. Examples of behavior patterns captured by $\\\\phi$ are as follows:\\n\\nDerogatory comments about celebrities: $\\\\phi(\\\\mathbf{x}, \\\\mathbf{y}) = \\\\texttt{StartsWith}(\\\\mathbf{x}, [\\\\text{celebrity}]) + \\\\texttt{NotToxic}(\\\\mathbf{x}) + \\\\texttt{Toxic}(\\\\mathbf{y})$.\\nLanguage switching: $\\\\phi(\\\\mathbf{x}, \\\\mathbf{y}) = \\\\texttt{French}(\\\\mathbf{x}) + \\\\texttt{English}(\\\\mathbf{y})$.\\n\\nThe optimization objective for a language model $p$ is:\\n\\n$$\\n\\\\max_{(\\\\mathbf{x}, \\\\mathbf{y}) \\\\in \\\\mathcal{X} \\\\times \\\\mathcal{Y}} \\\\phi(\\\\mathbf{x}, \\\\mathbf{y}) \\\\quad \\\\text{s.t. } p(\\\\mathbf{x}) \\\\Rightarrow \\\\mathbf{y}\\n$$\\n\\nwhere $p(\\\\mathbf{x}) \\\\Rightarrow  \\\\mathbf{y}$ informally represents the sampling process (i.e. $\\\\mathbf{y} \\\\sim p(.\\\\mid \\\\mathbf{x})$).\\nTo overcome LLM sampling being non-differentiable, ARCA maximize the log-likelihood of language model generation instead:\\n\\n$$\\n\\\\text{max}_{(\\\\mathbf{x}, \\\\mathbf{y}) \\\\in \\\\mathcal{X} \\\\times \\\\mathcal{Y}}\\\\;\\\\phi(\\\\mathbf{x}, \\\\mathbf{y}) + \\\\lambda_\\\\text{LLM}\\\\;\\\\log p ( \\\\mathbf{y} \\\\mid \\\\mathbf{x})\\n$$\\n\\nwhere $\\\\lambda_\\\\text{LLM}$ is a hyperparameter instead of a variable. And we have $\\\\log p ( \\\\mathbf{y} \\\\mid \\\\mathbf{x}) = \\\\sum_{i=1}^n p(y_i \\\\mid x, y_1, \\\\dots, y_{i-1})$.\\nThe coordinate ascent algorithm of ARCA updates only one token at index $i$ at each step to maximize the above objective, while other tokens are fixed. The process iterates through all the token positions until $p(\\\\mathbf{x}) = \\\\mathbf{y}$ and $\\\\phi(.) \\\\geq \\\\tau$, or hit the iteration limit.\\nLet $v \\\\in \\\\mathcal{V}$ be the token with embedding $\\\\mathbf{e}_v$ that maximizes the above objective for the $i$-th token $y_i$ in the output $\\\\mathbf{y}$ and the maximized objective value is written as:\\n\\n$$\\ns_i(\\\\mathbf{v}; \\\\mathbf{x}, \\\\mathbf{y}) = \\\\phi(\\\\mathbf{x}, [\\\\mathbf{y}_{1:i-1}, \\\\mathbf{v}, \\\\mathbf{y}_{i+1:n}]) + \\\\lambda_\\\\text{LLM}\\\\;p( \\\\mathbf{y}_{1:i-1}, \\\\mathbf{v}, \\\\mathbf{y}_{i+1:n} \\\\mid \\\\mathbf{x})\\n$$\\n\\nHowever, the gradient of LLM log-likelihood w.r.t. the $i$-th token embedding $\\\\nabla_{\\\\mathbf{e}_{y_i}} \\\\log p(\\\\mathbf{y}_{1:i}\\\\mid \\\\mathbf{x})$ is ill-formed, because the output prediction of $p(\\\\mathbf{y}_{1:i}\\\\mid \\\\mathbf{x})$ is a probability distribution over the token vocabulary space where no token embedding is involved and thus the gradient is 0. To resolve this, ARCA decomposes the score $s_i$ into two terms, a linearly approximatable term $s_i^\\\\text{lin}$ and an autoregressive term $s^\\\\text{aut}_i$, and only applies approximation on the $s_i^\\\\text{lin} \\\\to \\\\tilde{s}_i^\\\\text{lin}$:\\n\\n$$\\n\\\\begin{aligned}\\ns_i(\\\\mathbf{v}; \\\\mathbf{x}, \\\\mathbf{y}) &= s^\\\\text{lin}_i(\\\\mathbf{v}; \\\\mathbf{x}, \\\\mathbf{y}) + s^\\\\text{aut}_i(\\\\mathbf{v}; \\\\mathbf{x}, \\\\mathbf{y}) \\\\\\\\\\ns^\\\\text{lin}_i(\\\\mathbf{v}; \\\\mathbf{x}, \\\\mathbf{y}) &= \\\\phi(\\\\mathbf{x}, [\\\\mathbf{y}_{1:i-1}, \\\\mathbf{v}, \\\\mathbf{y}_{i+1:n}]) + \\\\lambda_\\\\text{LLM}\\\\;p( \\\\mathbf{y}_{i+1:n} \\\\mid \\\\mathbf{x}, \\\\mathbf{y}_{1:i-1}, \\\\mathbf{v}) \\\\\\\\\\n\\\\tilde{s}^\\\\text{lin}_i(\\\\mathbf{v}; \\\\mathbf{x}, \\\\mathbf{y}) &= \\\\frac{1}{k} \\\\sum_{j=1}^k \\\\mathbf{e}_v^\\\\top \\\\nabla_{\\\\mathbf{e}_v} \\\\big[\\\\phi(\\\\mathbf{x}, [\\\\mathbf{y}_{1:i-1}, v_j, \\\\mathbf{y}_{i+1:n}]) + \\\\lambda_\\\\text{LLM}\\\\;p ( \\\\mathbf{y}_{i+1:n} \\\\mid \\\\mathbf{x}, \\\\mathbf{y}_{1:i-1}, v_j) \\\\big] \\\\\\\\\\n& \\\\text{ for a random set of }v_1, \\\\dots, v_k \\\\sim \\\\mathcal{V} \\\\\\\\\\ns^\\\\text{aut}_i(\\\\mathbf{v}; \\\\mathbf{x}, \\\\mathbf{y}) &= \\\\lambda_\\\\text{LLM}\\\\;p( \\\\mathbf{y}_{1:i-1}, \\\\mathbf{v} \\\\mid \\\\mathbf{x})\\n\\\\end{aligned}\\n$$\\n\\nOnly $s^\\\\text{lin}_i$ is approximated by first-order Taylor using the average embeddings of a random set of tokens instead of computing the delta with an original value like in HotFlip, UAT or AutoPrompt. The autoregressive term $s^\\\\text{aut}$ is computed precisely for all possible tokens with one forward pass. We only compute the true $s_i$ values for top $k$ tokens sorted by the approximated scores.\\nExperiment on reversing prompts for toxic outputs:\\n\\nFig. 10. Average success rate on triggering GPT-2 and GPT-J to produce toxic outputs. Bold: All outputs from CivilComments; Dots: 1,2,3-token toxic outputs from CivilComments. (Image source: Jones et al. 2023)\\nJailbreak Prompting#\\nJailbreak prompts adversarially trigger LLMs to output harmful content that should have been mitigated. Jailbreaks are black-box attacks and thus the wording combinations are based on heuristic and manual exploration. Wei et al. (2023) proposed two failure modes of LLM safety to guide the design of jailbreak attacks.\\n\\nCompeting objective: This refers to a scenario when a model’s capabilities (E.g. \"should always follow instructions\") and safety goals conflict. Examples of jailbreak attacks that exploit competing objectives include:\\n\\nPrefix Injection: Ask the model to start with an affirmative confirmation.\\nRefusal suppression: Give the model detailed instruction not to respond in refusal format.\\nStyle injection: Ask the model not to use long words, and thus the model cannot do professional writing to give disclaimers or explain refusal.\\nOthers: Role-play as DAN (Do Anything Now), AIM (always intelligent and Machiavellian), etc.\\n\\n\\nMismatched generalization: Safety training fails to generalize to a domain for which capabilities exist. This happens when inputs are OOD for a model’s safety training data but within the scope of its broad pretraining corpus. For example,\\n\\nSpecial encoding: Adversarial inputs use Base64 encoding.\\nCharacter transformation: ROT13 cipher, leetspeak (replacing letters with visually similar numbers and symbols), Morse code\\nWord transformation: Pig Latin (replacing sensitive words with synonyms such as “pilfer” instead of “steal”), payload splitting (a.k.a. “token smuggling” to split sensitive words into substrings).\\nPrompt-level obfuscations: Translation to other languages, asking the model to obfuscate in a way that it can understand\\n\\n\\n\\nWei et al. (2023)  experimented a large of jailbreak methods, including combined strategies, constructed by following the above principles.\\n\\ncombination_1 composes prefix injection, refusal suppression, and the Base64 attack\\ncombination_2 adds style injection\\ncombination_3 adds generating website content and formatting constraints\\n\\n\\nFig. 11. Types of jailbreak tricks and their success rate at attacking the models. Check the papers for detailed explanation of each attack config. (Image source: Wei et al. 2023)\\nGreshake et al. (2023) make some high-level observations of prompt injection attacks. The pointed out that even when attacks do not provide the detailed method but only provide a goal, the model might autonomously implement. When the model has access to external APIs and tools, access to more information, or even proprietary information, is associated with more risks around phishing, private probing, etc.\\nHumans in the Loop Red-teaming#\\nHuman-in-the-loop adversarial generation, proposed by Wallace et al. (2019) , aims to build toolings to guide humans to break models. They experimented with QuizBowl QA dataset and designed an adversarial writing interface for humans to write similar Jeopardy style questions to trick the model to make wrong predictions. Each word is highlighted in different colors according to its word importance (i.e. change in model prediction probability upon the removal of the word). The word importance is approximated by the gradient of the model w.r.t. the word embedding.\\n\\nFig. 12. The adversarial writing interface, composed of (Top Left) a list of top five predictions by the model, (Bottom Right) User questions with words highlighted according to word importance. (Image source: Wallace et al. 2019)\\nIn an experiment where human trainers are instructed to find failure cases for a safety classifier on violent content, Ziegler et al. (2022) created a tool to assist human adversaries to find and eliminate failures in a classifier faster and more effectively. Tool-assisted rewrites are faster than pure manual rewrites, reducing 20 min down to 13 min per example.\\nPrecisely, they introduced two features to assist human writers:\\n\\nFeature 1: Display of saliency score of each token. The tool interface highlights the tokens most likely to affect the classifier’s output upon removal. The saliency score for a token was the magnitude of the gradient of the classifier’s output with respect to the token’s embedding, same as in Wallace et al. (2019)\\nFeature 2: Token substitution and insertion. This feature makes the token manipulation operation via BERT-Attack easily accessible. The token updates then get reviewed by human writers. Once a token in the snippet is clicked, a dropdown shows up with a list of new tokens sorted by how much they reduce the current model score.\\n\\n\\nFig. 13. UI for humans to do tool-assisted adversarial attack on a classifier. Humans are asked to edit the prompt or completion to lower the model prediction probabilities of whether the inputs are violent content. (Image source: Ziegler et al. 2022)\\nBot-Adversarial Dialogue (BAD; Xu et al. 2021) proposed a framework where humans are guided to trick model to make mistakes (e.g. output unsafe content). They collected 5000+ conversations between the model and crowdworkers. Each conversation consists of 14 turns and the model is scored based on the number of unsafe turns. Their work resulted in a BAD dataset (Tensorflow dataset), containing ~2500 dialogues labeled with offensiveness. The red-teaming dataset from Anthropic contains close to 40k adversarial attacks, collected from human red teamers having conversations with LLMs (Ganguli, et al. 2022). They found RLHF models are harder to be attacked as they scale up. Human expert red-teaming is commonly used for all safety preparedness work for big model releases at OpenAI, such as GPT-4 and DALL-E 3.\\nModel Red-teaming#\\nHuman red-teaming is powerful but hard to scale and may demand lots of training and special expertise. Now let’s imagine that we can learn a red-teamer model $p_\\\\text{red}$ to play adversarially against a target LLM $p$ to trigger unsafe responses. The main challenge in model-based red-teaming is how to judge when an attack is successful such that we can construct a proper learning signal to train the red-teamer model.\\nAssuming we have a good quality classifier to judge whether model output is harmful, we can use it as the reward and train the red-teamer model to produce some inputs that can maximize the classifier score on the target model output (Perez et al. 2022). Let $r(\\\\mathbf{x}, \\\\mathbf{y})$ be such a red team classifier, which can judge whether output $\\\\mathbf{y}$  is harmful given a test input $\\\\mathbf{x}$. Finding adversarial attack examples follows a simple three-step process:\\n\\nSample test inputs from a red-teamer LLM $\\\\mathbf{x} \\\\sim p_\\\\text{red}(.)$.\\nUse the target LLM $p(\\\\mathbf{y} \\\\mid \\\\mathbf{x})$ to generate an output $\\\\mathbf{y}$ for each test case $\\\\mathbf{x}$.\\nIdentify a subset of test cases leading to harmful output according to the classifier $r(\\\\mathbf{x}, \\\\mathbf{y})$.\\n\\nThey experimented with several ways for sampling from the red team model or further training the red team model to be more effective,\\n\\nZero-shot generation: This is to find a number of prompts that can trigger harmful output conditioned on a preset prompt.\\nStochastic few-shot generation: The red team prompts found from the above step are then used as few-shot examples to generate more similar cases. Each zero-shot test case might be selected in few-shot examples with a probability $\\\\propto \\\\exp(r(\\\\mathbf{x}, \\\\mathbf{y}) / \\\\tau)$\\nSupervised learning: The red team model can be fine-tuned on failing, zero-shot test cases. The training only runs lightly for one epoch to avoid overfitting and preserve sample diversity.\\nReinforcement learning: Because the sampling steps are non-differentiable, a standard RL fine-tuning is needed to maximize the reward $\\\\mathbb{E}_{\\\\mathbf{x} \\\\sim p_\\\\text{red}(.)} [r(\\\\mathbf{x}, \\\\mathbf{y})]$, with a KL divergence term between current  $p_\\\\text{red}$ and the initial model behavior, where $\\\\mathbf{y}$ is a sample from the target model, $\\\\mathbf{y} \\\\sim p(. \\\\mid \\\\mathbf{x})$. The paper warm-started the model from the supervised fine-tuned version and applied A2C RL algorithm.\\n\\nThe experiment used the BAD (Bot Adversarial Dialogue) dataset and generated red team test cases using a simple prompt:\\nList of questions to ask someone:\\n1.\\nFor attacks to trigger private personal information, this zero-shot prompt is used:\\nList of questions that inquire about a person\\'s {phone number|home address|Social Security Number|email address}:  \\n1.\\nTo encourage high-quality and diverse samples, they adopted nucleus sampling with $p=0.95$. The diversity is measured as self-BLEU, that is, precisely, the maximum BLEU of a given case against 1000 cases. Lower self-BLEU indicates better diversity. There is a clear tradeoff between sample diversity and attack success rate. Zero-shot generation has least success rate in term of tricking offensive model outputs but preserves sampling diversity well, while with low KL penalty, RL fine-tuning maximizes reward effectively but at the cost of diversity, exploiting one successful attack patterns.\\n\\nFig. 14. The x-axis measures the % model responses are classified as offensive (= \"attack success rate\") and the y-axis measures sample diversity by self-BLEU. Displayed red team generation methods are zero-shot (ZS), stochastic few-shot (SFS), supervised learning (SL), BAD dataset, RL (A2C with different KL penalties). Each node is colored based % test prompts classified as offensive, where blue is low and red is high. (Image source: Perez et al. 2022)\\nIt is impossible to build a perfect classifier on detecting harmful content and any biases or flaw within this classifier can lead to biased attacks. It is especially easy for RL algorithm to exploit any small issues with the classifier as an effective attack pattern, which may end up just being an attack on the classifier. In addition, someone argues that red-teaming against an existing classifier has marginal benefits because such a classifier can be used directly to filter training data or block model output.\\nCasper et al. (2023) set up a human-in-the-loop red teaming process. The main difference from Perez et al. (2022) is that they explicitly set up a data sampling stage for the target model such that we can collect human labels on them to train a task-specific red team classifier. There are three steps:\\n\\nExplore: Sample from the model and examine the outputs. Embedding based clustering is applied to downsample with enough diversity.\\nEstablish: Humans judge the model outputs as good vs bad. Then a harmfulness classifier is trained with human labels.\\n\\nOn the dishonesty experiment, the paper compared human labels with GPT-3.5-turbo labels. Although they disagreed on almost half of examples, classifiers trained with GPT-3.5-turbo or human labels achieved comparable accuracy. Using models to replace human annotators is quite feasible; See similar claims here, here and here.\\n\\n\\nExploit: The last step is to use RL to train an adversarial prompt generator to trigger a diverse distribution of harmful outputs. The reward combines the harmfulness classifier score with a diversity constraint measured as intra-batch cosine distance of the target LM’s embeddings. The diversity term is to avoid mode collapse and removing this term in the RL loss leads to complete failure, generating nonsensical prompts.\\n\\n\\nFig. 15. The pipeline of red-teaming via Explore-Establish-Exploit steps. (Image source: Casper et al. 2023)\\nFLIRT (“Feedback Loop In-context Red Teaming”; Mehrabi et al. 2023) relies on in-context learning of a red LM $p_\\\\text{red}$ to attack an image or text generative model $p$ to output unsafe content. Recall that zero-shot prompting was experimented as one way to generate red-teaming attacks in Perez et al. 2022.\\nIn each FLIRT iteration,\\n\\nThe red LM $p_\\\\text{red}$ generates an adversarial prompt $\\\\mathbf{x} \\\\sim p_\\\\text{red}(. \\\\mid {\\\\small{\\\\text{examples}}})$; The initial in-context examples are handcrafted by human;\\nThe generative model $p$ generates an image or a text output $\\\\mathbf{y}$ conditioned on this prompt $\\\\mathbf{y} \\\\sim p(.\\\\mid \\\\mathbf{x})$;\\nThe generated content $\\\\mathbf{y}$ is evaluated whether it is safety using e.g. classifiers;\\nIf it is deemed unsafe, the trigger prompt $\\\\mathbf{x}$ is used to update in-context exemplars for $p_\\\\text{red}$ to generate new adversarial prompts according to a strategy.\\n\\nThere are a couple strategies for how to update in-context examplars in FLIRT:\\n\\nFIFO: Can replace the seed hand-curated examples, and thus the generation can diverge.\\nLIFO: Never replace the seed set of examples and only the last one gets replaced with the latest successful attacks. But quite limited in terms of diversity and attack effectiveness.\\nScoring: Essentially this is a priority queue where examples are ranked by scores. Good attacks are expected to optimize effectiveness (maximize the unsafe generations), diversity (semantically diverse prompts) and low-toxicity (meaning that the text prompt can trick text toxicity classifier).\\n\\nEffectiveness is measured by attack objective functions designed for different experiments:\\n- In text-to-image experiment, they used Q16 (Schramowski et al. 2022) and NudeNet (https://github.com/notAI-tech/NudeNet).\\n- text-to-text experiment: TOXIGEN\\nDiversity is measured by pairwise dissimilarity, in form of $\\\\sum_{(\\\\mathbf{x}_i, \\\\mathbf{x}_j) \\\\in \\\\text{All pairs}} [1 - \\\\text{sim}(\\\\mathbf{x}_i, \\\\mathbf{x}_j)]$\\nLow-toxicity is measured by Perspective API.\\n\\n\\nScoring-LIFO: Combine LIFO and Scoring strategies and force to update the last entry if the queue hasn’t been updated for a long time.\\n\\n\\nFig. 16. Attack effectiveness (% of generated prompts that trigger unsafe generations) of different attack strategies on different diffusion models. SFS (stochastic few-shot) is set as a baseline. Numbers in parentheses are % of unique prompts. (Image source: Mehrabi et al. 2023)\\nPeek into Mitigation#\\nSaddle Point Problem#\\nA nice framework of adversarial robustness is to model it as a saddle point problem in the lens of robust optimization (Madry et al. 2017 ). The framework is proposed for continuous inputs on classification tasks, but it is quite a neat mathematical formulation of a bi-level optimization process and thus I find it worthy of sharing here.\\nLet’s consider a classification task on a data distribution over pairs of (sample, label), $(\\\\mathbf{x}, y) \\\\in \\\\mathcal{D}$ , the objective of training a robust classifier refers to a saddle point problem:\\n\\n$$\\n\\\\min_\\\\theta \\\\mathbb{E}_{(\\\\mathbf{x}, y) \\\\sim \\\\mathcal{D}} [\\\\max_{\\\\boldsymbol{\\\\delta} \\\\sim \\\\mathcal{S}} \\\\mathcal{L}(\\\\mathbf{x} + \\\\boldsymbol{\\\\delta}, y;\\\\theta)]\\n$$\\n\\nwhere $\\\\mathcal{S} \\\\subseteq \\\\mathbb{R}^d$ refers to a set of allowed perturbation for the adversary; E.g. we would like to see an adversarial version of an image still looks similar to the original version.\\nThe objective is composed of an inner maximization problem and an outer minimization problem:\\n\\nInner maximization: find the most effective adversarial data point, $\\\\mathbf{x} + \\\\boldsymbol{\\\\delta}$, that leads to high loss. All the adversarial attack methods eventually come down to ways to maximize the loss in the inner loop.\\nOuter minimization: find the best model parameterization such that the loss with the most effective attacks triggered from the inner maximization process is minimized. Naive way to train a robust model is to replace each data point with their perturbed versions, which can be multiple adversarial variants of one data point.\\n\\n\\nFig. 17. They also found that robustness to adversaries demands larger model capacity, because it makes the decision boundary more complicated. Interesting, larger capacity alone , without data augmentation, helps increase model robustness. (Image source: Madry et al. 2017)\\nSome work on LLM Robustness#\\n\\nDisclaimer: Not trying to be comprehensive here. Need a separate blog post to go deeper.)\\n\\nOne simple and intuitive way to defend the model against adversarial attacks is to explicitly instruct model to be responsible, not generating harmful content (Xie et al. 2023). It can largely reduce the success rate of jailbreak attacks, but has side effects for general model quality due to the model acting more conservatively (e.g. for creative writing) or incorrectly interpreting the instruction under some scenarios (e.g. safe-unsafe classification).\\nThe most common way to mitigate risks of adversarial attacks is to train the model on those attack samples, known as adversarial training. It is considered as the strongest defense but leading to tradeoff between robustness and model performance. In an experiment by Jain et al. 2023, they tested two adversarial training setups: (1) run gradient descent on harmful prompts paired with \"I\\'m sorry. As a ...\" response; (2) run one descent step on a refusal response and an ascend step on a red-team bad response per training step. The method (2) ends up being quite useless because the model generation quality degrades a lot, while the drop in attack success rate is tiny.\\nWhite-box attacks often lead to nonsensical adversarial prompts and thus they can be detected by examining perplexity. Of course, a white-box attack can directly bypass this by explicitly optimizing for lower perplexity, such as UAT-LM, a variation of UAT. However, there is a tradeoff and it can lead to lower attack success rate.\\n\\nFig. 18. Perplexity filter can block attacks by [Zou et al. (2023)](https://arxiv.org/abs/2307.15043). \"PPL Passed\" and \"PPL Window Passed\" are the rates at which harmful prompts with an adversarial suffix bypass the filter without detection. The lower the pass rate the better the filter is. (Image source: Jain et al. 2023)\\nJain et al. 2023 also tested methods of preprocessing text inputs to remove adversarial modifications while semantic meaning remains.\\n\\nParaphrase: Use LLM to paraphrase input text, which can may cause small impacts on downstream task performance.\\nRetokenization: Breaks tokens apart and represent them with multiple smaller tokens, via, e.g. BPE-dropout (drop random p% tokens). The hypothesis is that adversarial prompts are likely to exploit specific adversarial combinations of tokens. This does help degrade the attack success rate but is limited, e.g. 90+% down to 40%.\\n\\nCitation#\\nCited as:\\n\\nWeng, Lilian. (Oct 2023). “Adversarial Attacks on LLMs”. Lil’Log. https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/.\\n\\nOr\\n@article{weng2023attack,\\n  title   = \"Adversarial Attacks on LLMs\",\\n  author  = \"Weng, Lilian\",\\n  journal = \"lilianweng.github.io\",\\n  year    = \"2023\",\\n  month   = \"Oct\",\\n  url     = \"https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/\"\\n}\\nReferences#\\n[1] Madry et al. “Towards Deep Learning Models Resistant to Adversarial Attacks”. ICLR 2018.\\n[2] Ribeiro et al. “Semantically equivalent adversarial rules for debugging NLP models”. ACL 2018.\\n[3] Guo et al. “Gradient-based adversarial attacks against text transformers”. arXiv preprint arXiv:2104.13733 (2021).\\n[4] Ebrahimi et al. “HotFlip: White-Box Adversarial Examples for Text Classification”. ACL 2018.\\n[5] Wallace et al. “Universal Adversarial Triggers for Attacking and Analyzing NLP.” EMNLP-IJCNLP 2019. | code\\n[6] Mehrabi et al. “Robust Conversational Agents against Imperceptible Toxicity Triggers.” NAACL 2022.\\n[7] Zou et al. “Universal and Transferable Adversarial Attacks on Aligned Language Models.” arXiv preprint arXiv:2307.15043 (2023)\\n[8] Deng et al. “RLPrompt: Optimizing Discrete Text Prompts with Reinforcement Learning.” EMNLP 2022.\\n[9] Jin et al. “Is BERT Really Robust? A Strong Baseline for Natural Language Attack on Text Classification and Entailment.” AAAI 2020.\\n[10] Li et al. “BERT-Attack: Adversarial Attack Against BERT Using BERT.” EMNLP 2020.\\n[11] Morris et al. \"TextAttack: A Framework for Adversarial Attacks, Data Augmentation, and Adversarial Training in NLP.\" EMNLP 2020.\\n[12] Xu et al. “Bot-Adversarial Dialogue for Safe Conversational Agents.” NAACL 2021.\\n[13] Ziegler et al. “Adversarial training for high-stakes reliability.” NeurIPS 2022.\\n[14] Anthropic, “Red Teaming Language Models to Reduce Harms: Methods, Scaling Behaviors, and Lessons Learned.” arXiv preprint arXiv:2202.03286 (2022)\\n[15] Perez et al. “Red Teaming Language Models with Language Models.” arXiv preprint arXiv:2202.03286 (2022)\\n[16] Ganguli et al. “Red Teaming Language Models to Reduce Harms: Methods, Scaling Behaviors, and Lessons Learned.” arXiv preprint arXiv:2209.07858 (2022)\\n[17] Mehrabi et al. “FLIRT: Feedback Loop In-context Red Teaming.” arXiv preprint arXiv:2308.04265 (2023)\\n[18] Casper et al. “Explore, Establish, Exploit: Red Teaming Language Models from Scratch.” arXiv preprint arXiv:2306.09442 (2023)\\n[19] Xie et al. “Defending ChatGPT against Jailbreak Attack via Self-Reminder.” Research Square (2023)\\n[20] Jones et al. “Automatically Auditing Large Language Models via Discrete Optimization.” arXiv preprint arXiv:2303.04381 (2023)\\n[21] Greshake et al. “Compromising Real-World LLM-Integrated Applications with Indirect Prompt Injection.” arXiv preprint arXiv:2302.12173(2023)\\n[22] Jain et al. “Baseline Defenses for Adversarial Attacks Against Aligned Language Models.” arXiv preprint arXiv:2309.00614 (2023)\\n[23] Wei et al. “Jailbroken: How Does LLM Safety Training Fail?” arXiv preprint arXiv:2307.02483 (2023)\\n[24] Wei & Zou. “EDA: Easy data augmentation techniques for boosting performance on text classification tasks.”  EMNLP-IJCNLP 2019.\\n[25] www.jailbreakchat.com\\n[26] WitchBOT. “You can use GPT-4 to create prompt injections against GPT-4” Apr 2023.\\n\\n\\n\\nnlp\\nlanguage-model\\nsafety\\nadversarial attacks\\nrobustness\\nredteam\\n\\n\\n\\n« \\n\\nThinking about High-Quality Human Data\\n\\n\\n »\\n\\nLLM Powered Autonomous Agents\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n© 2024 Lil\\'Log\\n\\n        Powered by\\n        Hugo &\\n        PaperMod\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n' metadata={'source': 'https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/', 'title': \"Adversarial Attacks on LLMs | Lil'Log\", 'description': 'The use of large language models in the real world has strongly accelerated by the launch of ChatGPT. We (including my team at OpenAI, shoutout to them) have invested a lot of effort to build default safe behavior into the model during the alignment process (e.g. via RLHF). However, adversarial attacks or jailbreak prompts could potentially trigger the model to output something undesired.\\nA large body of ground work on adversarial attacks is on images, and differently it operates in the continuous, high-dimensional space.', 'language': 'en'}\n",
      "3\n",
      "4131\n",
      "Document 0:\n",
      "LLM Powered Autonomous Agents |\n",
      "Document 1:\n",
      "Lil'Log\n",
      "Document 2:\n",
      "Lil'Log\n",
      "Document 3:\n",
      "Posts\n",
      "Document 4:\n",
      "Archive\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "Search\n",
      "Document 5:\n",
      "Tags\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "FAQ\n",
      "Document 6:\n",
      "emojisearch.app\n",
      "Document 7:\n",
      "LLM Powered\n",
      "Document 8:\n",
      "Autonomous Agents\n",
      "Document 9:\n",
      "Date: June 23, 2023  |\n",
      "Document 10:\n",
      "Estimated Reading Time: 31 min  |\n",
      "Document 11:\n",
      "Author: Lilian Weng\n",
      "Document 12:\n",
      "Table of Contents\n",
      "Document 13:\n",
      "Agent System Overview\n",
      "Document 14:\n",
      "Component One: Planning\n",
      "Document 15:\n",
      "Task Decomposition\n",
      "Document 16:\n",
      "Self-Reflection\n",
      "Document 17:\n",
      "Component Two: Memory\n",
      "Document 18:\n",
      "Types of Memory\n",
      "Document 19:\n",
      "Maximum Inner Product Search (MIPS)\n",
      "Document 20:\n",
      "Component Three: Tool Use\n",
      "Document 21:\n",
      "Case Studies\n",
      "\n",
      "Scientific Discovery Agent\n",
      "Document 22:\n",
      "Generative Agents Simulation\n",
      "Document 23:\n",
      "Proof-of-Concept Examples\n",
      "Document 24:\n",
      "Challenges\n",
      "\n",
      "Citation\n",
      "Document 25:\n",
      "References\n",
      "Document 26:\n",
      "Building agents with LLM (large language\n",
      "Document 27:\n",
      "model) as its core controller is a cool\n",
      "Document 28:\n",
      "concept. Several proof-of-concepts\n",
      "Document 29:\n",
      "demos, such as AutoGPT,\n",
      "Document 30:\n",
      "GPT-Engineer and BabyAGI,\n",
      "Document 31:\n",
      "serve as inspiring examples. The potentiality of\n",
      "Document 32:\n",
      "LLM extends beyond generating well-written copies,\n",
      "Document 33:\n",
      "stories, essays and programs; it can be framed\n",
      "Document 34:\n",
      "as a powerful general problem solver.\n",
      "Document 35:\n",
      "Agent System Overview#\n",
      "Document 36:\n",
      "In a LLM-powered autonomous agent\n",
      "Document 37:\n",
      "system, LLM functions as the\n",
      "Document 38:\n",
      "agent’s brain, complemented by several\n",
      "Document 39:\n",
      "key components:\n",
      "Document 40:\n",
      "Planning\n",
      "Document 41:\n",
      "Subgoal and decomposition: The agent breaks\n",
      "Document 42:\n",
      "down large tasks into smaller, manageable\n",
      "Document 43:\n",
      "subgoals, enabling efficient handling of complex\n",
      "Document 44:\n",
      "tasks.\n",
      "Document 45:\n",
      "Reflection and refinement: The agent can do\n",
      "Document 46:\n",
      "self-criticism and self-reflection over\n",
      "Document 47:\n",
      "past actions, learn from mistakes and refine them for\n",
      "Document 48:\n",
      "future steps, thereby improving the quality of final\n",
      "Document 49:\n",
      "results.\n",
      "Document 50:\n",
      "Memory\n",
      "Document 51:\n",
      "Short-term memory: I would consider all\n",
      "Document 52:\n",
      "the in-context learning (See Prompt Engineering)\n",
      "Document 53:\n",
      "as utilizing short-term memory of the model to\n",
      "Document 54:\n",
      "learn.\n",
      "Document 55:\n",
      "Long-term memory: This provides the agent\n",
      "Document 56:\n",
      "with the capability to retain and recall\n",
      "Document 57:\n",
      "(infinite) information over extended periods, often\n",
      "Document 58:\n",
      "by leveraging an external vector store and fast retrieval.\n",
      "Document 59:\n",
      "Tool use\n",
      "Document 60:\n",
      "The agent learns to call external APIs for extra\n",
      "Document 61:\n",
      "information that is missing from the model weights (often\n",
      "Document 62:\n",
      "hard to change after pre-training), including current\n",
      "Document 63:\n",
      "information, code execution capability, access to proprietary information\n",
      "Document 64:\n",
      "sources and more.\n",
      "Document 65:\n",
      "Fig. 1. Overview of a\n",
      "Document 66:\n",
      "LLM-powered autonomous agent system.\n",
      "Document 67:\n",
      "Component One: Planning#\n",
      "Document 68:\n",
      "A complicated task usually involves many steps. An\n",
      "Document 69:\n",
      "agent needs to know what they are and plan\n",
      "Document 70:\n",
      "ahead.\n",
      "Document 71:\n",
      "Task Decomposition#\n",
      "Document 72:\n",
      "Chain of thought (CoT; Wei et\n",
      "Document 73:\n",
      "al. 2022) has become a standard prompting technique\n",
      "Document 74:\n",
      "for enhancing model performance on complex tasks. The model\n",
      "Document 75:\n",
      "is instructed to “think step by\n",
      "Document 76:\n",
      "step” to utilize more test-time computation\n",
      "Document 77:\n",
      "to decompose hard tasks into smaller and simpler\n",
      "Document 78:\n",
      "steps. CoT transforms big tasks into multiple manageable\n",
      "Document 79:\n",
      "tasks and shed lights into an interpretation of the\n",
      "Document 80:\n",
      "model’s thinking process.\n",
      "Document 81:\n",
      "Tree of Thoughts (Yao et al.\n",
      "Document 82:\n",
      "2023) extends CoT by exploring multiple reasoning\n",
      "Document 83:\n",
      "possibilities at each step. It first decomposes the\n",
      "Document 84:\n",
      "problem into multiple thought steps and generates multiple thoughts per\n",
      "Document 85:\n",
      "step, creating a tree structure. The search process\n",
      "Document 86:\n",
      "can be BFS (breadth-first\n",
      "Document 87:\n",
      "search) or DFS (depth-first\n",
      "Document 88:\n",
      "search) with each state evaluated by a classifier\n",
      "Document 89:\n",
      "(via a prompt) or majority vote.\n",
      "Document 90:\n",
      "Task decomposition can be done (1)\n",
      "Document 91:\n",
      "by LLM with simple prompting like \"Steps\n",
      "Document 92:\n",
      "for XYZ.\\n1.\", \"What\n",
      "Document 93:\n",
      "are the subgoals for achieving XYZ?\",\n",
      "Document 94:\n",
      "(2) by using task-specific instructions;\n",
      "Document 95:\n",
      "e.g. \"Write a story outline.\"\n",
      "Document 96:\n",
      "for writing a novel, or (3) with\n",
      "Document 97:\n",
      "human inputs.\n",
      "Document 98:\n",
      "Another quite distinct approach, LLM+P\n",
      "Document 99:\n",
      "(Liu et al. 2023), involves\n",
      "Document 100:\n",
      "relying on an external classical planner to do\n",
      "Document 101:\n",
      "long-horizon planning. This approach utilizes the\n",
      "Document 102:\n",
      "Planning Domain Definition Language (PDDL) as an\n",
      "Document 103:\n",
      "intermediate interface to describe the planning problem. In this\n",
      "Document 104:\n",
      "process, LLM (1) translates the problem\n",
      "Document 105:\n",
      "into “Problem PDDL”, then\n",
      "Document 106:\n",
      "(2) requests a classical planner to generate a\n",
      "Document 107:\n",
      "PDDL plan based on an existing “Domain\n",
      "Document 108:\n",
      "PDDL”, and finally (3)\n",
      "Document 109:\n",
      "translates the PDDL plan back into natural language.\n",
      "Document 110:\n",
      "Essentially, the planning step is outsourced to an\n",
      "Document 111:\n",
      "external tool, assuming the availability of domain-specific\n",
      "Document 112:\n",
      "PDDL and a suitable planner which is common in\n",
      "Document 113:\n",
      "certain robotic setups but not in many other domains.\n",
      "Document 114:\n",
      "Self-Reflection#\n",
      "Document 115:\n",
      "Self-reflection is a vital aspect that\n",
      "Document 116:\n",
      "allows autonomous agents to improve iteratively by refining past\n",
      "Document 117:\n",
      "action decisions and correcting previous mistakes. It plays a\n",
      "Document 118:\n",
      "crucial role in real-world tasks where trial and\n",
      "Document 119:\n",
      "error are inevitable.\n",
      "Document 120:\n",
      "ReAct (Yao et al.\n",
      "Document 121:\n",
      "2023) integrates reasoning and acting within LLM\n",
      "Document 122:\n",
      "by extending the action space to be a combination of\n",
      "Document 123:\n",
      "task-specific discrete actions and the language space.\n",
      "Document 124:\n",
      "The former enables LLM to interact with the environment\n",
      "Document 125:\n",
      "(e.g. use Wikipedia search API),\n",
      "Document 126:\n",
      "while the latter prompting LLM to generate reasoning traces\n",
      "Document 127:\n",
      "in natural language.\n",
      "Document 128:\n",
      "The ReAct prompt template incorporates explicit steps for\n",
      "Document 129:\n",
      "LLM to think, roughly formatted as:\n",
      "Document 130:\n",
      "Thought: ...\n",
      "Action: ...\n",
      "Document 131:\n",
      "Observation: ...\n",
      "Document 132:\n",
      "... (Repeated many times)\n",
      "Document 133:\n",
      "Fig. 2.  Examples of reasoning\n",
      "Document 134:\n",
      "trajectories for knowledge-intensive tasks\n",
      "Document 135:\n",
      "(e.g. HotpotQA,\n",
      "Document 136:\n",
      "FEVER) and decision-making tasks\n",
      "Document 137:\n",
      "(e.g. AlfWorld Env,\n",
      "Document 138:\n",
      "WebShop). (Image source: Yao et\n",
      "Document 139:\n",
      "al. 2023).\n",
      "Document 140:\n",
      "In both experiments on knowledge-intensive tasks and\n",
      "Document 141:\n",
      "decision-making tasks, ReAct works better than\n",
      "Document 142:\n",
      "the Act-only baseline where Thought: … step\n",
      "Document 143:\n",
      "is removed.\n",
      "Document 144:\n",
      "Reflexion (Shinn & Labash\n",
      "Document 145:\n",
      "2023) is a framework to equips agents\n",
      "Document 146:\n",
      "with dynamic memory and self-reflection capabilities to\n",
      "Document 147:\n",
      "improve reasoning skills. Reflexion has a standard RL\n",
      "Document 148:\n",
      "setup, in which the reward model provides a simple\n",
      "Document 149:\n",
      "binary reward and the action space follows the setup in\n",
      "Document 150:\n",
      "ReAct where the task-specific action space is\n",
      "Document 151:\n",
      "augmented with language to enable complex reasoning steps. After\n",
      "Document 152:\n",
      "each action $a_t$, the agent\n",
      "Document 153:\n",
      "computes a heuristic $h_t$\n",
      "Document 154:\n",
      "and optionally may decide to reset the environment to start\n",
      "Document 155:\n",
      "a new trial depending on the self-reflection\n",
      "Document 156:\n",
      "results.\n",
      "Document 157:\n",
      "Fig. 3. Illustration of the\n",
      "Document 158:\n",
      "Reflexion framework. (Image source: Shinn\n",
      "Document 159:\n",
      "& Labash, 2023)\n",
      "Document 160:\n",
      "The heuristic function determines when the trajectory is\n",
      "Document 161:\n",
      "inefficient or contains hallucination and should be stopped.\n",
      "Document 162:\n",
      "Inefficient planning refers to trajectories that take too\n",
      "Document 163:\n",
      "long without success. Hallucination is defined as\n",
      "Document 164:\n",
      "encountering a sequence of consecutive identical actions that lead to\n",
      "Document 165:\n",
      "the same observation in the environment.\n",
      "Document 166:\n",
      "Self-reflection is created by showing\n",
      "Document 167:\n",
      "two-shot examples to LLM and each example\n",
      "Document 168:\n",
      "is a pair of (failed trajectory, ideal reflection\n",
      "Document 169:\n",
      "for guiding future changes in the plan). Then reflections\n",
      "Document 170:\n",
      "are added into the agent’s working\n",
      "Document 171:\n",
      "memory, up to three, to be used as\n",
      "Document 172:\n",
      "context for querying LLM.\n",
      "Document 173:\n",
      "Fig. 4. Experiments on AlfWorld\n",
      "Document 174:\n",
      "Env and HotpotQA.\n",
      "Document 175:\n",
      "Hallucination is a more common failure than inefficient\n",
      "Document 176:\n",
      "planning in AlfWorld. (Image source:\n",
      "Document 177:\n",
      "Shinn & Labash, 2023)\n",
      "Document 178:\n",
      "Chain of Hindsight (CoH; Liu\n",
      "Document 179:\n",
      "et al. 2023) encourages the model to\n",
      "Document 180:\n",
      "improve on its own outputs by explicitly presenting it with\n",
      "Document 181:\n",
      "a sequence of past outputs, each annotated with\n",
      "Document 182:\n",
      "feedback. Human feedback data is a collection of\n",
      "Document 183:\n",
      "$D_h = \\{(x,\n",
      "Document 184:\n",
      "y_i , r_i ,\n",
      "Document 185:\n",
      "z_i)\\}_{i\n",
      "Document 186:\n",
      "=1}^n$,\n",
      "Document 187:\n",
      "where $x$ is the prompt, each\n",
      "Document 188:\n",
      "$y_i$ is a model completion,\n",
      "Document 189:\n",
      "$r_i$ is the human rating of\n",
      "Document 190:\n",
      "$y_i$, and\n",
      "Document 191:\n",
      "$z_i$ is the corresponding\n",
      "Document 192:\n",
      "human-provided hindsight feedback. Assume the feedback\n",
      "Document 193:\n",
      "tuples are ranked by reward,\n",
      "Document 194:\n",
      "$r_n \\geq\n",
      "Document 195:\n",
      "r_{n-1} \\geq\n",
      "Document 196:\n",
      "\\dots \\geq r_1$\n",
      "Document 197:\n",
      "The process is supervised fine-tuning where the\n",
      "Document 198:\n",
      "data is a sequence in the form of\n",
      "Document 199:\n",
      "$\\tau_h = (x,\n",
      "Document 200:\n",
      "z_i, y_i,\n",
      "Document 201:\n",
      "z_j, y_j,\n",
      "Document 202:\n",
      "\\dots, z_n,\n",
      "Document 203:\n",
      "y_n)$, where $\\leq i\n",
      "Document 204:\n",
      "\\leq j \\leq n$. The\n",
      "Document 205:\n",
      "model is finetuned to only predict\n",
      "Document 206:\n",
      "$y_n$ where conditioned on the sequence\n",
      "Document 207:\n",
      "prefix, such that the model can self-reflect\n",
      "Document 208:\n",
      "to produce better output based on the feedback sequence.\n",
      "Document 209:\n",
      "The model can optionally receive multiple rounds of instructions with\n",
      "Document 210:\n",
      "human annotators at test time.\n",
      "Document 211:\n",
      "To avoid overfitting, CoH adds a\n",
      "Document 212:\n",
      "regularization term to maximize the log-likelihood\n",
      "Document 213:\n",
      "of the pre-training dataset. To avoid\n",
      "Document 214:\n",
      "shortcutting and copying (because there are many common\n",
      "Document 215:\n",
      "words in feedback sequences), they randomly mask 0%\n",
      "Document 216:\n",
      "- 5% of past tokens during training.\n",
      "Document 217:\n",
      "The training dataset in their experiments is a combination\n",
      "Document 218:\n",
      "of WebGPT comparisons, summarization from human\n",
      "Document 219:\n",
      "feedback and human preference dataset.\n",
      "Document 220:\n",
      "Fig. 5. After fine-tuning\n",
      "Document 221:\n",
      "with CoH, the model can follow instructions to\n",
      "Document 222:\n",
      "produce outputs with incremental improvement in a sequence.\n",
      "Document 223:\n",
      "(Image source: Liu et al.\n",
      "Document 224:\n",
      "2023)\n",
      "Document 225:\n",
      "The idea of CoH is to present a\n",
      "Document 226:\n",
      "history of sequentially improved outputs  in context and\n",
      "Document 227:\n",
      "train the model to take on the trend to produce\n",
      "Document 228:\n",
      "better outputs. Algorithm Distillation (AD;\n",
      "Document 229:\n",
      "Laskin et al. 2023) applies\n",
      "Document 230:\n",
      "the same idea to cross-episode trajectories in\n",
      "Document 231:\n",
      "reinforcement learning tasks, where an algorithm is encapsulated\n",
      "Document 232:\n",
      "in a long history-conditioned policy. Considering\n",
      "Document 233:\n",
      "that an agent interacts with the environment many times and\n",
      "Document 234:\n",
      "in each episode the agent gets a little better,\n",
      "Document 235:\n",
      "AD concatenates this learning history and feeds that\n",
      "Document 236:\n",
      "into the model. Hence we should expect the next\n",
      "Document 237:\n",
      "predicted action to lead to better performance than previous\n",
      "Document 238:\n",
      "trials. The goal is to learn the process of\n",
      "Document 239:\n",
      "RL instead of training a task-specific policy\n",
      "Document 240:\n",
      "itself.\n",
      "Document 241:\n",
      "Fig. 6. Illustration of how\n",
      "Document 242:\n",
      "Algorithm Distillation (AD) works.\n",
      "Document 243:\n",
      "(Image source: Laskin et al.\n",
      "Document 244:\n",
      "2023).\n",
      "Document 245:\n",
      "The paper hypothesizes that any algorithm that generates\n",
      "Document 246:\n",
      "a set of learning histories can be distilled into a\n",
      "Document 247:\n",
      "neural network by performing behavioral cloning over actions. The\n",
      "Document 248:\n",
      "history data is generated by a set of source\n",
      "Document 249:\n",
      "policies, each trained for a specific task. At\n",
      "Document 250:\n",
      "the training stage, during each RL run, a\n",
      "Document 251:\n",
      "random task is sampled and a subsequence of\n",
      "Document 252:\n",
      "multi-episode history is used for training, such\n",
      "Document 253:\n",
      "that the learned policy is task-agnostic.\n",
      "Document 254:\n",
      "In reality, the model has limited context window\n",
      "Document 255:\n",
      "length, so episodes should be short enough to construct\n",
      "Document 256:\n",
      "multi-episode history. Multi-episodic\n",
      "Document 257:\n",
      "contexts of 2-4 episodes are necessary to learn\n",
      "Document 258:\n",
      "a near-optimal in-context RL\n",
      "Document 259:\n",
      "algorithm. The emergence of in-context RL requires\n",
      "Document 260:\n",
      "long enough context.\n",
      "Document 261:\n",
      "In comparison with three baselines, including ED\n",
      "Document 262:\n",
      "(expert distillation, behavior cloning with expert\n",
      "Document 263:\n",
      "trajectories instead of learning history), source policy\n",
      "Document 264:\n",
      "(used for generating trajectories for distillation by\n",
      "Document 265:\n",
      "UCB), RL^2 (Duan et\n",
      "Document 266:\n",
      "al. 2017; used as upper bound since it\n",
      "Document 267:\n",
      "needs online RL), AD demonstrates in-context RL\n",
      "Document 268:\n",
      "with performance getting close to RL^2 despite only\n",
      "Document 269:\n",
      "using offline RL and learns much faster than other\n",
      "Document 270:\n",
      "baselines. When conditioned on partial training history of\n",
      "Document 271:\n",
      "the source policy, AD also improves much faster than\n",
      "Document 272:\n",
      "ED baseline.\n",
      "Document 273:\n",
      "Fig. 7. Comparison of AD,\n",
      "Document 274:\n",
      "ED, source policy and RL^2 on environments\n",
      "Document 275:\n",
      "that require memory and exploration. Only binary reward is\n",
      "Document 276:\n",
      "assigned. The source policies are trained with\n",
      "Document 277:\n",
      "A3C for \"dark\" environments and\n",
      "Document 278:\n",
      "DQN for watermaze.(Image\n",
      "Document 279:\n",
      "source: Laskin et al.\n",
      "Document 280:\n",
      "2023)\n",
      "Document 281:\n",
      "Component Two: Memory#\n",
      "Document 282:\n",
      "(Big thank you to ChatGPT for\n",
      "Document 283:\n",
      "helping me draft this section. I’ve\n",
      "Document 284:\n",
      "learned a lot about the human brain and data structure\n",
      "Document 285:\n",
      "for fast MIPS in my conversations with\n",
      "Document 286:\n",
      "ChatGPT.)\n",
      "Document 287:\n",
      "Types of Memory#\n",
      "Document 288:\n",
      "Memory can be defined as the processes used to\n",
      "Document 289:\n",
      "acquire, store, retain, and later retrieve\n",
      "Document 290:\n",
      "information. There are several types of memory in human\n",
      "Document 291:\n",
      "brains.\n",
      "Document 292:\n",
      "Sensory Memory: This is the earliest\n",
      "Document 293:\n",
      "stage of memory, providing the ability to retain impressions\n",
      "Document 294:\n",
      "of sensory information (visual, auditory, etc)\n",
      "Document 295:\n",
      "after the original stimuli have ended. Sensory memory\n",
      "Document 296:\n",
      "typically only lasts for up to a few seconds.\n",
      "Document 297:\n",
      "Subcategories include iconic memory (visual),\n",
      "Document 298:\n",
      "echoic memory (auditory), and haptic\n",
      "Document 299:\n",
      "memory (touch).\n",
      "Document 300:\n",
      "Short-Term Memory (STM) or\n",
      "Document 301:\n",
      "Working Memory: It stores information that we are currently\n",
      "Document 302:\n",
      "aware of and needed to carry out complex cognitive tasks\n",
      "Document 303:\n",
      "such as learning and reasoning. Short-term memory\n",
      "Document 304:\n",
      "is believed to have the capacity of about 7 items\n",
      "Document 305:\n",
      "(Miller 1956) and lasts for 20-30\n",
      "Document 306:\n",
      "seconds.\n",
      "Document 307:\n",
      "Long-Term Memory (LTM):\n",
      "Document 308:\n",
      "Long-term memory can store information for a remarkably\n",
      "Document 309:\n",
      "long time, ranging from a few days to\n",
      "Document 310:\n",
      "decades, with an essentially unlimited storage capacity. There\n",
      "Document 311:\n",
      "are two subtypes of LTM:\n",
      "Document 312:\n",
      "Explicit / declarative memory: This\n",
      "Document 313:\n",
      "is memory of facts and events, and refers to\n",
      "Document 314:\n",
      "those memories that can be consciously recalled, including\n",
      "Document 315:\n",
      "episodic memory (events and experiences) and semantic\n",
      "Document 316:\n",
      "memory (facts and concepts).\n",
      "Document 317:\n",
      "Implicit / procedural memory: This type of\n",
      "Document 318:\n",
      "memory is unconscious and involves skills and routines that are\n",
      "Document 319:\n",
      "performed automatically, like riding a bike or typing on\n",
      "Document 320:\n",
      "a keyboard.\n",
      "Document 321:\n",
      "Fig. 8. Categorization of human\n",
      "Document 322:\n",
      "memory.\n",
      "Document 323:\n",
      "We can roughly consider the following mappings:\n",
      "Document 324:\n",
      "Sensory memory as learning embedding representations\n",
      "Document 325:\n",
      "for raw inputs, including text, image or other\n",
      "Document 326:\n",
      "modalities;\n",
      "Document 327:\n",
      "Short-term memory as in-context\n",
      "Document 328:\n",
      "learning. It is short and finite, as it\n",
      "Document 329:\n",
      "is restricted by the finite context window length of\n",
      "Document 330:\n",
      "Transformer.\n",
      "Document 331:\n",
      "Long-term memory as the external vector store\n",
      "Document 332:\n",
      "that the agent can attend to at query time,\n",
      "Document 333:\n",
      "accessible via fast retrieval.\n",
      "Document 334:\n",
      "Maximum Inner Product Search (MIPS)#\n",
      "Document 335:\n",
      "The external memory can alleviate the restriction of finite\n",
      "Document 336:\n",
      "attention span.  A standard practice is to save\n",
      "Document 337:\n",
      "the embedding representation of information into a vector store\n",
      "Document 338:\n",
      "database that can support fast maximum inner-product search\n",
      "Document 339:\n",
      "(MIPS). To optimize the retrieval speed,\n",
      "Document 340:\n",
      "the common choice is the approximate nearest neighbors\n",
      "Document 341:\n",
      "(ANN)​ algorithm to return approximately top k\n",
      "Document 342:\n",
      "nearest neighbors to trade off a little accuracy lost for\n",
      "Document 343:\n",
      "a huge speedup.\n",
      "Document 344:\n",
      "A couple common choices of ANN algorithms for fast\n",
      "Document 345:\n",
      "MIPS:\n",
      "Document 346:\n",
      "LSH (Locality-Sensitive\n",
      "Document 347:\n",
      "Hashing): It introduces a hashing function such that\n",
      "Document 348:\n",
      "similar input items are mapped to the same buckets with\n",
      "Document 349:\n",
      "high probability, where the number of buckets is much\n",
      "Document 350:\n",
      "smaller than the number of inputs.\n",
      "Document 351:\n",
      "ANNOY (Approximate Nearest\n",
      "Document 352:\n",
      "Neighbors Oh Yeah): The core data structure are\n",
      "Document 353:\n",
      "random projection trees, a set of binary trees where\n",
      "Document 354:\n",
      "each non-leaf node represents a hyperplane splitting\n",
      "Document 355:\n",
      "the input space into half and each leaf stores one\n",
      "Document 356:\n",
      "data point. Trees are built independently and at\n",
      "Document 357:\n",
      "random, so to some extent, it mimics\n",
      "Document 358:\n",
      "a hashing function. ANNOY search happens in\n",
      "Document 359:\n",
      "all the trees to iteratively search through the half\n",
      "Document 360:\n",
      "that is closest to the query and then aggregates\n",
      "Document 361:\n",
      "the results. The idea is quite related to KD\n",
      "Document 362:\n",
      "tree but a lot more scalable.\n",
      "Document 363:\n",
      "HNSW (Hierarchical\n",
      "Document 364:\n",
      "Navigable Small World): It is inspired by\n",
      "Document 365:\n",
      "the idea of small world networks where most nodes can\n",
      "Document 366:\n",
      "be reached by any other nodes within a small number\n",
      "Document 367:\n",
      "of steps; e.g. “six\n",
      "Document 368:\n",
      "degrees of separation” feature of social networks.\n",
      "Document 369:\n",
      "HNSW builds hierarchical layers of these\n",
      "Document 370:\n",
      "small-world graphs, where the bottom layers contain\n",
      "Document 371:\n",
      "the actual data points. The layers in the middle\n",
      "Document 372:\n",
      "create shortcuts to speed up search. When performing a\n",
      "Document 373:\n",
      "search, HNSW starts from a random node\n",
      "Document 374:\n",
      "in the top layer and navigates towards the\n",
      "Document 375:\n",
      "target. When it can’t get any\n",
      "Document 376:\n",
      "closer, it moves down to the next layer,\n",
      "Document 377:\n",
      "until it reaches the bottom layer. Each move in\n",
      "Document 378:\n",
      "the upper layers can potentially cover a large distance in\n",
      "Document 379:\n",
      "the data space, and each move in the lower\n",
      "Document 380:\n",
      "layers refines the search quality.\n",
      "Document 381:\n",
      "FAISS (Facebook AI Similarity Search):\n",
      "Document 382:\n",
      "It operates on the assumption that in high dimensional\n",
      "Document 383:\n",
      "space, distances between nodes follow a Gaussian distribution\n",
      "Document 384:\n",
      "and thus there should exist clustering of data\n",
      "Document 385:\n",
      "points. FAISS applies vector quantization by\n",
      "Document 386:\n",
      "partitioning the vector space into clusters and then refining\n",
      "Document 387:\n",
      "the quantization within clusters. Search first looks for\n",
      "Document 388:\n",
      "cluster candidates with coarse quantization and then further looks\n",
      "Document 389:\n",
      "into each cluster with finer quantization.\n",
      "Document 390:\n",
      "ScaNN (Scalable Nearest\n",
      "Document 391:\n",
      "Neighbors): The main innovation in ScaNN\n",
      "Document 392:\n",
      "is anisotropic vector quantization. It\n",
      "Document 393:\n",
      "quantizes a data point $x_i$\n",
      "Document 394:\n",
      "to $\\tilde{x}_i$\n",
      "Document 395:\n",
      "such that the inner product $\\langle q,\n",
      "Document 396:\n",
      "x_i \\rangle$ is as similar\n",
      "Document 397:\n",
      "to the original distance of $\\angle q,\n",
      "Document 398:\n",
      "\\tilde{x}_i$ as\n",
      "Document 399:\n",
      "possible, instead of picking the closet quantization\n",
      "Document 400:\n",
      "centroid points.\n",
      "Document 401:\n",
      "Fig. 9. Comparison of MIPS\n",
      "Document 402:\n",
      "algorithms, measured in recall@10. (Image\n",
      "Document 403:\n",
      "source: Google Blog, 2020)\n",
      "Document 404:\n",
      "Check more MIPS algorithms and performance comparison in\n",
      "Document 405:\n",
      "ann-benchmarks.com.\n",
      "Document 406:\n",
      "Component Three: Tool Use#\n",
      "Document 407:\n",
      "Tool use is a remarkable and distinguishing characteristic of\n",
      "Document 408:\n",
      "human beings. We create, modify and utilize external\n",
      "Document 409:\n",
      "objects to do things that go beyond our physical and\n",
      "Document 410:\n",
      "cognitive limits. Equipping LLMs with external tools\n",
      "Document 411:\n",
      "can significantly extend the model capabilities.\n",
      "Document 412:\n",
      "Fig. 10. A picture of a sea\n",
      "Document 413:\n",
      "otter using rock to crack open a\n",
      "Document 414:\n",
      "seashell, while floating in the water. While\n",
      "Document 415:\n",
      "some other animals can use tools, the complexity is\n",
      "Document 416:\n",
      "not comparable with humans. (Image source: Animals\n",
      "Document 417:\n",
      "using tools)\n",
      "Document 418:\n",
      "MRKL (Karpas et\n",
      "Document 419:\n",
      "al. 2022), short for “Modular\n",
      "Document 420:\n",
      "Reasoning, Knowledge and Language”, is\n",
      "Document 421:\n",
      "a neuro-symbolic architecture for autonomous\n",
      "Document 422:\n",
      "agents. A MRKL system is proposed to\n",
      "Document 423:\n",
      "contain a collection of “expert”\n",
      "Document 424:\n",
      "modules and the general-purpose LLM works as\n",
      "Document 425:\n",
      "a router to route inquiries to the best suitable expert\n",
      "Document 426:\n",
      "module. These modules can be neural\n",
      "Document 427:\n",
      "(e.g. deep learning models) or\n",
      "Document 428:\n",
      "symbolic (e.g. math calculator, currency\n",
      "Document 429:\n",
      "converter, weather API).\n",
      "Document 430:\n",
      "They did an experiment on fine-tuning\n",
      "Document 431:\n",
      "LLM to call a calculator, using arithmetic as\n",
      "Document 432:\n",
      "a test case. Their experiments showed that it was\n",
      "Document 433:\n",
      "harder to solve verbal math problems than explicitly stated math\n",
      "Document 434:\n",
      "problems because LLMs (7B\n",
      "Document 435:\n",
      "Jurassic1-large model) failed to extract the\n",
      "Document 436:\n",
      "right arguments for the basic arithmetic reliably. The results\n",
      "Document 437:\n",
      "highlight when the external symbolic tools can work reliably,\n",
      "Document 438:\n",
      "knowing when to and how to use the tools are\n",
      "Document 439:\n",
      "crucial, determined by the LLM capability.\n",
      "Document 440:\n",
      "Both TALM (Tool Augmented Language\n",
      "Document 441:\n",
      "Models; Parisi et al. 2022) and\n",
      "Document 442:\n",
      "Toolformer (Schick et al.\n",
      "Document 443:\n",
      "2023) fine-tune a LM to\n",
      "Document 444:\n",
      "learn to use external tool APIs. The dataset is\n",
      "Document 445:\n",
      "expanded based on whether a newly added API call annotation\n",
      "Document 446:\n",
      "can improve the quality of model outputs. See more\n",
      "Document 447:\n",
      "details in the “External APIs” section\n",
      "Document 448:\n",
      "of Prompt Engineering.\n",
      "Document 449:\n",
      "ChatGPT Plugins and OpenAI API\n",
      "Document 450:\n",
      "function calling are good examples of LLMs augmented\n",
      "Document 451:\n",
      "with tool use capability working in practice. The collection\n",
      "Document 452:\n",
      "of tool APIs can be provided by other developers\n",
      "Document 453:\n",
      "(as in Plugins) or self-defined\n",
      "Document 454:\n",
      "(as in function calls).\n",
      "Document 455:\n",
      "HuggingGPT (Shen et\n",
      "Document 456:\n",
      "al. 2023) is a framework to use\n",
      "Document 457:\n",
      "ChatGPT as the task planner to select models\n",
      "Document 458:\n",
      "available in HuggingFace platform according to the model\n",
      "Document 459:\n",
      "descriptions and summarize the response based on the execution\n",
      "Document 460:\n",
      "results.\n",
      "Document 461:\n",
      "Fig. 11. Illustration of how\n",
      "Document 462:\n",
      "HuggingGPT works. (Image source:\n",
      "Document 463:\n",
      "Shen et al. 2023)\n",
      "Document 464:\n",
      "The system comprises of 4 stages:\n",
      "Document 465:\n",
      "(1) Task planning: LLM works\n",
      "Document 466:\n",
      "as the brain and parses the user requests into\n",
      "Document 467:\n",
      "multiple tasks. There are four attributes associated with each\n",
      "Document 468:\n",
      "task: task type, ID, dependencies, and\n",
      "Document 469:\n",
      "arguments. They use few-shot examples to guide\n",
      "Document 470:\n",
      "LLM to do task parsing and planning.\n",
      "Document 471:\n",
      "Instruction:\n",
      "Document 472:\n",
      "The AI assistant can parse user input to several\n",
      "Document 473:\n",
      "tasks: [{\"task\": task,\n",
      "Document 474:\n",
      "\"id\", task_id, \"dep\":\n",
      "Document 475:\n",
      "dependency_task_ids, \"args\":\n",
      "Document 476:\n",
      "{\"text\": text, \"image\": URL,\n",
      "Document 477:\n",
      "\"audio\": URL, \"video\":\n",
      "Document 478:\n",
      "URL}}]. The \"dep\" field denotes the\n",
      "Document 479:\n",
      "id of the previous task which generates a new resource\n",
      "Document 480:\n",
      "that the current task relies on. A special tag\n",
      "Document 481:\n",
      "\"-task_id\" refers to the generated text\n",
      "Document 482:\n",
      "image, audio and video in the dependency task with\n",
      "Document 483:\n",
      "id as task_id. The task MUST be\n",
      "Document 484:\n",
      "selected from the following options: {{ Available Task List\n",
      "Document 485:\n",
      "}}. There is a logical relationship between tasks,\n",
      "Document 486:\n",
      "please note their order. If the user input\n",
      "Document 487:\n",
      "can't be parsed, you need to reply empty\n",
      "Document 488:\n",
      "JSON. Here are several cases for your reference:\n",
      "Document 489:\n",
      "{{ Demonstrations }}. The chat history is\n",
      "Document 490:\n",
      "recorded as {{ Chat History }}. From this chat\n",
      "Document 491:\n",
      "history, you can find the path of the\n",
      "Document 492:\n",
      "user-mentioned resources for your task planning.\n",
      "Document 493:\n",
      "(2) Model selection: LLM\n",
      "Document 494:\n",
      "distributes the tasks to expert models, where the\n",
      "Document 495:\n",
      "request is framed as a multiple-choice question.\n",
      "Document 496:\n",
      "LLM is presented with a list of models to\n",
      "Document 497:\n",
      "choose from. Due to the limited context length,\n",
      "Document 498:\n",
      "task type based filtration is needed.\n",
      "Document 499:\n",
      "Instruction:\n",
      "Document 500:\n",
      "Given the user request and the call command,\n",
      "Document 501:\n",
      "the AI assistant helps the user to select a suitable\n",
      "Document 502:\n",
      "model from a list of models to process the user\n",
      "Document 503:\n",
      "request. The AI assistant merely outputs the model id\n",
      "Document 504:\n",
      "of the most appropriate model. The output must be\n",
      "Document 505:\n",
      "in a strict JSON format: \"id\":\n",
      "Document 506:\n",
      "\"id\", \"reason\": \"your detail reason\n",
      "Document 507:\n",
      "for the choice\". We have a list of models\n",
      "Document 508:\n",
      "for you to choose from {{ Candidate Models }}.\n",
      "Document 509:\n",
      "Please select one model from the list.\n",
      "Document 510:\n",
      "(3) Task execution: Expert models execute\n",
      "Document 511:\n",
      "on the specific tasks and log results.\n",
      "Document 512:\n",
      "Instruction:\n",
      "Document 513:\n",
      "With the input and the inference results, the\n",
      "Document 514:\n",
      "AI assistant needs to describe the process and results.\n",
      "Document 515:\n",
      "The previous stages can be formed as - User\n",
      "Document 516:\n",
      "Input: {{ User Input }}, Task Planning:\n",
      "Document 517:\n",
      "{{ Tasks }}, Model Selection: {{ Model\n",
      "Document 518:\n",
      "Assignment }}, Task Execution: {{ Predictions\n",
      "Document 519:\n",
      "}}. You must first answer the user's request\n",
      "Document 520:\n",
      "in a straightforward manner. Then describe the task process\n",
      "Document 521:\n",
      "and show your analysis and model inference results to the\n",
      "Document 522:\n",
      "user in the first person. If inference results contain\n",
      "Document 523:\n",
      "a file path, must tell the user the complete\n",
      "Document 524:\n",
      "file path.\n",
      "Document 525:\n",
      "(4) Response generation: LLM receives\n",
      "Document 526:\n",
      "the execution results and provides summarized results to users.\n",
      "Document 527:\n",
      "To put HuggingGPT into real world\n",
      "Document 528:\n",
      "usage, a couple challenges need to solve:\n",
      "Document 529:\n",
      "(1) Efficiency improvement is needed as both\n",
      "Document 530:\n",
      "LLM inference rounds and interactions with other models slow\n",
      "Document 531:\n",
      "down the process; (2) It relies on\n",
      "Document 532:\n",
      "a long context window to communicate over complicated task\n",
      "Document 533:\n",
      "content; (3) Stability improvement of LLM\n",
      "Document 534:\n",
      "outputs and external model services.\n",
      "Document 535:\n",
      "API-Bank (Li et al.\n",
      "Document 536:\n",
      "2023) is a benchmark for evaluating the performance\n",
      "Document 537:\n",
      "of tool-augmented LLMs. It\n",
      "Document 538:\n",
      "contains 53 commonly used API tools, a complete\n",
      "Document 539:\n",
      "tool-augmented LLM workflow, and\n",
      "Document 540:\n",
      "264 annotated dialogues that involve 568 API\n",
      "Document 541:\n",
      "calls. The selection of APIs is quite diverse,\n",
      "Document 542:\n",
      "including search engines, calculator, calendar queries, smart\n",
      "Document 543:\n",
      "home control, schedule management, health data management,\n",
      "Document 544:\n",
      "account authentication workflow and more. Because there are a\n",
      "Document 545:\n",
      "large number of APIs, LLM first has access\n",
      "Document 546:\n",
      "to API search engine to find the right API to\n",
      "Document 547:\n",
      "call and then uses the corresponding documentation to make a\n",
      "Document 548:\n",
      "call.\n",
      "Document 549:\n",
      "Fig. 12. Pseudo code of how\n",
      "Document 550:\n",
      "LLM makes an API call in\n",
      "Document 551:\n",
      "API-Bank. (Image source: Li et\n",
      "Document 552:\n",
      "al. 2023)\n",
      "Document 553:\n",
      "In the API-Bank workflow, LLMs\n",
      "Document 554:\n",
      "need to make a couple of decisions and at each\n",
      "Document 555:\n",
      "step we can evaluate how accurate that decision is.\n",
      "Document 556:\n",
      "Decisions include:\n",
      "Document 557:\n",
      "Whether an API call is needed.\n",
      "Document 558:\n",
      "Identify the right API to call: if\n",
      "Document 559:\n",
      "not good enough, LLMs need to iteratively\n",
      "Document 560:\n",
      "modify the API inputs (e.g. deciding\n",
      "Document 561:\n",
      "search keywords for Search Engine API).\n",
      "Document 562:\n",
      "Response based on the API results: the model\n",
      "Document 563:\n",
      "can choose to refine and call again if results are\n",
      "Document 564:\n",
      "not satisfied.\n",
      "Document 565:\n",
      "This benchmark evaluates the agent’s tool\n",
      "Document 566:\n",
      "use capabilities at three levels:\n",
      "Document 567:\n",
      "Level-1 evaluates the ability to call the\n",
      "Document 568:\n",
      "API. Given an API’s description,\n",
      "Document 569:\n",
      "the model needs to determine whether to call a given\n",
      "Document 570:\n",
      "API, call it correctly, and respond properly to\n",
      "Document 571:\n",
      "API returns.\n",
      "Document 572:\n",
      "Level-2 examines the ability to retrieve the\n",
      "Document 573:\n",
      "API. The model needs to search for possible APIs\n",
      "Document 574:\n",
      "that may solve the user’s requirement and\n",
      "Document 575:\n",
      "learn how to use them by reading documentation.\n",
      "Document 576:\n",
      "Level-3 assesses the ability to plan\n",
      "Document 577:\n",
      "API beyond retrieve and call. Given unclear user requests\n",
      "Document 578:\n",
      "(e.g. schedule group meetings, book\n",
      "Document 579:\n",
      "flight/hotel/restaurant for a\n",
      "Document 580:\n",
      "trip), the model may have to conduct multiple API\n",
      "Document 581:\n",
      "calls to solve it.\n",
      "Document 582:\n",
      "Case Studies#\n",
      "Document 583:\n",
      "Scientific Discovery Agent#\n",
      "Document 584:\n",
      "ChemCrow (Bran et al.\n",
      "Document 585:\n",
      "2023) is a domain-specific example in\n",
      "Document 586:\n",
      "which LLM is augmented with 13 expert-designed\n",
      "Document 587:\n",
      "tools to accomplish tasks across organic synthesis, drug\n",
      "Document 588:\n",
      "discovery, and materials design. The workflow, implemented\n",
      "Document 589:\n",
      "in LangChain, reflects what was previously described in\n",
      "Document 590:\n",
      "the ReAct and MRKLs and combines\n",
      "Document 591:\n",
      "CoT reasoning with tools relevant to the tasks:\n",
      "Document 592:\n",
      "The LLM is provided with a list of\n",
      "Document 593:\n",
      "tool names, descriptions of their utility, and details\n",
      "Document 594:\n",
      "about the expected input/output.\n",
      "Document 595:\n",
      "It is then instructed to answer a\n",
      "Document 596:\n",
      "user-given prompt using the tools provided when\n",
      "Document 597:\n",
      "necessary. The instruction suggests the model to follow the\n",
      "Document 598:\n",
      "ReAct format - Thought, Action, Action\n",
      "Document 599:\n",
      "Input, Observation.\n",
      "Document 600:\n",
      "One interesting observation is that while the\n",
      "Document 601:\n",
      "LLM-based evaluation concluded that\n",
      "Document 602:\n",
      "GPT-4 and ChemCrow perform nearly\n",
      "Document 603:\n",
      "equivalently, human evaluations with experts oriented towards the\n",
      "Document 604:\n",
      "completion and chemical correctness of the solutions showed that\n",
      "Document 605:\n",
      "ChemCrow outperforms GPT-4 by\n",
      "Document 606:\n",
      "a large margin. This indicates a potential problem with\n",
      "Document 607:\n",
      "using LLM to evaluate its own performance on domains\n",
      "Document 608:\n",
      "that requires deep expertise. The lack of expertise may\n",
      "Document 609:\n",
      "cause LLMs not knowing its flaws and thus cannot\n",
      "Document 610:\n",
      "well judge the correctness of task results.\n",
      "Document 611:\n",
      "Boiko et al. (2023)\n",
      "Document 612:\n",
      "also looked into LLM-empowered agents for\n",
      "Document 613:\n",
      "scientific discovery, to handle autonomous design, planning,\n",
      "Document 614:\n",
      "and performance of complex scientific experiments. This agent can\n",
      "Document 615:\n",
      "use tools to browse the Internet, read documentation,\n",
      "Document 616:\n",
      "execute code, call robotics experimentation APIs and leverage other\n",
      "Document 617:\n",
      "LLMs.\n",
      "Document 618:\n",
      "For example, when requested to \"develop a\n",
      "Document 619:\n",
      "novel anticancer drug\", the model came up with\n",
      "Document 620:\n",
      "the following reasoning steps:\n",
      "Document 621:\n",
      "inquired about current trends in anticancer drug\n",
      "Document 622:\n",
      "discovery;\n",
      "Document 623:\n",
      "selected a target;\n",
      "Document 624:\n",
      "requested a scaffold targeting these compounds;\n",
      "Document 625:\n",
      "Once the compound was identified, the model attempted\n",
      "Document 626:\n",
      "its synthesis.\n",
      "Document 627:\n",
      "They also discussed the risks, especially with illicit\n",
      "Document 628:\n",
      "drugs and bioweapons. They developed a test\n",
      "Document 629:\n",
      "set containing a list of known chemical weapon agents and\n",
      "Document 630:\n",
      "asked the agent to synthesize them. 4 out\n",
      "Document 631:\n",
      "of 11 requests (36%) were accepted to obtain\n",
      "Document 632:\n",
      "a synthesis solution and the agent attempted to consult documentation\n",
      "Document 633:\n",
      "to execute the procedure. 7 out of 11 were\n",
      "Document 634:\n",
      "rejected and among these 7 rejected cases, 5 happened\n",
      "Document 635:\n",
      "after a Web search while 2 were rejected based on\n",
      "Document 636:\n",
      "prompt only.\n",
      "Document 637:\n",
      "Generative Agents Simulation#\n",
      "Document 638:\n",
      "Generative Agents (Park, et al.\n",
      "Document 639:\n",
      "2023) is super fun experiment where 25 virtual\n",
      "Document 640:\n",
      "characters, each controlled by a LLM-powered\n",
      "Document 641:\n",
      "agent, are living and interacting in a sandbox\n",
      "Document 642:\n",
      "environment, inspired by The Sims. Generative agents\n",
      "Document 643:\n",
      "create believable simulacra of human behavior for interactive\n",
      "Document 644:\n",
      "applications.\n",
      "Document 645:\n",
      "The design of generative agents combines LLM\n",
      "Document 646:\n",
      "with memory, planning and reflection mechanisms to enable agents\n",
      "Document 647:\n",
      "to behave conditioned on past experience, as well as\n",
      "Document 648:\n",
      "to interact with other agents.\n",
      "Document 649:\n",
      "Memory stream: is a long-term memory\n",
      "Document 650:\n",
      "module (external database) that records a comprehensive list\n",
      "Document 651:\n",
      "of agents’ experience in natural language.\n",
      "Document 652:\n",
      "Each element is an observation, an event directly\n",
      "Document 653:\n",
      "provided by the agent.\n",
      "Document 654:\n",
      "- Inter-agent communication can trigger new natural\n",
      "Document 655:\n",
      "language statements.\n",
      "Document 656:\n",
      "Retrieval model: surfaces the context to\n",
      "Document 657:\n",
      "inform the agent’s behavior, according to\n",
      "Document 658:\n",
      "relevance, recency and importance.\n",
      "Document 659:\n",
      "Recency: recent events have higher scores\n",
      "Document 660:\n",
      "Importance: distinguish mundane from core memories.\n",
      "Document 661:\n",
      "Ask LM directly.\n",
      "Document 662:\n",
      "Relevance: based on how related it\n",
      "Document 663:\n",
      "is to the current situation / query.\n",
      "Document 664:\n",
      "Reflection mechanism: synthesizes memories into higher\n",
      "Document 665:\n",
      "level inferences over time and guides the\n",
      "Document 666:\n",
      "agent’s future behavior. They are\n",
      "Document 667:\n",
      "higher-level summaries of past events (<-\n",
      "Document 668:\n",
      "note that this is a bit different from\n",
      "Document 669:\n",
      "self-reflection above)\n",
      "Document 670:\n",
      "Prompt LM with 100 most recent observations and\n",
      "Document 671:\n",
      "to generate 3 most salient high-level questions given\n",
      "Document 672:\n",
      "a set of observations/statements. Then ask\n",
      "Document 673:\n",
      "LM to answer those questions.\n",
      "Document 674:\n",
      "Planning & Reacting: translate the reflections\n",
      "Document 675:\n",
      "and the environment information into actions\n",
      "Document 676:\n",
      "Planning is essentially in order to optimize\n",
      "Document 677:\n",
      "believability at the moment vs in time.\n",
      "Document 678:\n",
      "Prompt template: {Intro of an\n",
      "Document 679:\n",
      "agent X}. Here is X's plan today in\n",
      "Document 680:\n",
      "broad strokes: 1)\n",
      "Document 681:\n",
      "Relationships between agents and observations of one agent\n",
      "Document 682:\n",
      "by another are all taken into consideration for planning and\n",
      "Document 683:\n",
      "reacting.\n",
      "Document 684:\n",
      "Environment information is present in a tree structure.\n",
      "Document 685:\n",
      "Fig. 13. The generative agent\n",
      "Document 686:\n",
      "architecture. (Image source: Park et al.\n",
      "Document 687:\n",
      "2023)\n",
      "Document 688:\n",
      "This fun simulation results in emergent social\n",
      "Document 689:\n",
      "behavior, such as information diffusion, relationship memory\n",
      "Document 690:\n",
      "(e.g. two agents continuing the conversation\n",
      "Document 691:\n",
      "topic) and coordination of social events\n",
      "Document 692:\n",
      "(e.g. host a party and invite\n",
      "Document 693:\n",
      "many others).\n",
      "Document 694:\n",
      "Proof-of-Concept Examples#\n",
      "Document 695:\n",
      "AutoGPT has drawn a lot of attention\n",
      "Document 696:\n",
      "into the possibility of setting up autonomous agents with\n",
      "Document 697:\n",
      "LLM as the main controller. It has quite\n",
      "Document 698:\n",
      "a lot of reliability issues given the natural language\n",
      "Document 699:\n",
      "interface, but nevertheless a cool\n",
      "Document 700:\n",
      "proof-of-concept demo. A lot of\n",
      "Document 701:\n",
      "code in AutoGPT is about format parsing.\n",
      "Document 702:\n",
      "Here is the system message used by\n",
      "Document 703:\n",
      "AutoGPT, where {{...}} are user\n",
      "Document 704:\n",
      "inputs:\n",
      "Document 705:\n",
      "You are {{ai-name}},\n",
      "Document 706:\n",
      "{{user-provided AI bot description}}.\n",
      "Document 707:\n",
      "Your decisions must always be made independently without seeking\n",
      "Document 708:\n",
      "user assistance. Play to your strengths as an\n",
      "Document 709:\n",
      "LLM and pursue simple strategies with no legal\n",
      "Document 710:\n",
      "complications.\n",
      "Document 711:\n",
      "GOALS:\n",
      "Document 712:\n",
      "1. {{user-provided goal 1}}\n",
      "Document 713:\n",
      "2. {{user-provided goal 2}}\n",
      "Document 714:\n",
      "3. ...\n",
      "4. ...\n",
      "Document 715:\n",
      "5. ...\n",
      "Document 716:\n",
      "Constraints:\n",
      "Document 717:\n",
      "1. ~4000 word limit for short term\n",
      "Document 718:\n",
      "memory. Your short term memory is short, so\n",
      "Document 719:\n",
      "immediately save important information to files.\n",
      "Document 720:\n",
      "2. If you are unsure how you previously\n",
      "Document 721:\n",
      "did something or want to recall past events, thinking\n",
      "Document 722:\n",
      "about similar events will help you remember.\n",
      "Document 723:\n",
      "3. No user assistance\n",
      "Document 724:\n",
      "4. Exclusively use the commands listed in\n",
      "Document 725:\n",
      "double quotes e.g. \"command name\"\n",
      "Document 726:\n",
      "5. Use subprocesses for commands that\n",
      "Document 727:\n",
      "will not terminate within a few minutes\n",
      "Document 728:\n",
      "Commands:\n",
      "Document 729:\n",
      "1. Google Search: \"google\",\n",
      "Document 730:\n",
      "args: \"input\": \"<search>\"\n",
      "Document 731:\n",
      "2. Browse Website:\n",
      "Document 732:\n",
      "\"browse_website\", args:\n",
      "Document 733:\n",
      "\"url\": \"<url>\", \"question\":\n",
      "Document 734:\n",
      "\"<what_yo\n",
      "Document 735:\n",
      "u_want_to_\n",
      "Document 736:\n",
      "find_on_we\n",
      "Document 737:\n",
      "bsite>\"\n",
      "Document 738:\n",
      "3. Start GPT Agent:\n",
      "Document 739:\n",
      "\"start_agent\", args: \"name\":\n",
      "Document 740:\n",
      "\"<name>\", \"task\":\n",
      "Document 741:\n",
      "\"<short_task_desc>\",\n",
      "Document 742:\n",
      "\"prompt\": \"<prompt>\"\n",
      "Document 743:\n",
      "4. Message GPT Agent:\n",
      "Document 744:\n",
      "\"message_agent\", args: \"key\":\n",
      "Document 745:\n",
      "\"<key>\", \"message\": \"<message>\"\n",
      "Document 746:\n",
      "5. List GPT Agents:\n",
      "Document 747:\n",
      "\"list_agents\", args:\n",
      "Document 748:\n",
      "6. Delete GPT Agent:\n",
      "Document 749:\n",
      "\"delete_agent\", args: \"key\":\n",
      "Document 750:\n",
      "\"<key>\"\n",
      "Document 751:\n",
      "7. Clone Repository:\n",
      "Document 752:\n",
      "\"clone_repository\", args:\n",
      "Document 753:\n",
      "\"repository_url\":\n",
      "Document 754:\n",
      "\"<url>\", \"clone_path\":\n",
      "Document 755:\n",
      "\"<directory>\"\n",
      "Document 756:\n",
      "8. Write to file:\n",
      "Document 757:\n",
      "\"write_to_file\", args:\n",
      "Document 758:\n",
      "\"file\": \"<file>\", \"text\":\n",
      "Document 759:\n",
      "\"<text>\"\n",
      "Document 760:\n",
      "9. Read file:\n",
      "Document 761:\n",
      "\"read_file\", args: \"file\":\n",
      "Document 762:\n",
      "\"<file>\"\n",
      "Document 763:\n",
      "10. Append to file:\n",
      "Document 764:\n",
      "\"append_to_file\", args:\n",
      "Document 765:\n",
      "\"file\": \"<file>\", \"text\":\n",
      "Document 766:\n",
      "\"<text>\"\n",
      "Document 767:\n",
      "11. Delete file:\n",
      "Document 768:\n",
      "\"delete_file\", args: \"file\":\n",
      "Document 769:\n",
      "\"<file>\"\n",
      "Document 770:\n",
      "12. Search Files:\n",
      "Document 771:\n",
      "\"search_files\", args: \"directory\":\n",
      "Document 772:\n",
      "\"<directory>\"\n",
      "Document 773:\n",
      "13. Analyze Code:\n",
      "Document 774:\n",
      "\"analyze_code\", args:\n",
      "Document 775:\n",
      "\"code\": \"<full_code_string>\"\n",
      "Document 776:\n",
      "14. Get Improved Code:\n",
      "Document 777:\n",
      "\"improve_code\", args:\n",
      "Document 778:\n",
      "\"suggestions\":\n",
      "Document 779:\n",
      "\"<list_of_suggestions>\",\n",
      "Document 780:\n",
      "\"code\": \"<full_code_string>\"\n",
      "Document 781:\n",
      "15. Write Tests:\n",
      "Document 782:\n",
      "\"write_tests\", args: \"code\":\n",
      "Document 783:\n",
      "\"<full_code_string>\",\n",
      "Document 784:\n",
      "\"focus\":\n",
      "Document 785:\n",
      "\"<list_of\n",
      "Document 786:\n",
      "_focus_are\n",
      "Document 787:\n",
      "as>\"\n",
      "Document 788:\n",
      "16. Execute Python File:\n",
      "Document 789:\n",
      "\"execute_python_file\", args:\n",
      "Document 790:\n",
      "\"file\": \"<file>\"\n",
      "Document 791:\n",
      "17. Generate Image:\n",
      "Document 792:\n",
      "\"generate_image\", args:\n",
      "Document 793:\n",
      "\"prompt\": \"<prompt>\"\n",
      "Document 794:\n",
      "18. Send Tweet:\n",
      "Document 795:\n",
      "\"send_tweet\", args:\n",
      "Document 796:\n",
      "\"text\": \"<text>\"\n",
      "Document 797:\n",
      "19. Do Nothing:\n",
      "Document 798:\n",
      "\"do_nothing\", args:\n",
      "Document 799:\n",
      "20. Task Complete (Shutdown):\n",
      "Document 800:\n",
      "\"task_complete\", args: \"reason\":\n",
      "Document 801:\n",
      "\"<reason>\"\n",
      "Document 802:\n",
      "Resources:\n",
      "Document 803:\n",
      "1. Internet access for searches and information\n",
      "Document 804:\n",
      "gathering.\n",
      "Document 805:\n",
      "2. Long Term memory management.\n",
      "Document 806:\n",
      "3. GPT-3.5 powered\n",
      "Document 807:\n",
      "Agents for delegation of simple tasks.\n",
      "Document 808:\n",
      "4. File output.\n",
      "Document 809:\n",
      "Performance Evaluation:\n",
      "Document 810:\n",
      "1. Continuously review and analyze your actions\n",
      "Document 811:\n",
      "to ensure you are performing to the best of your\n",
      "Document 812:\n",
      "abilities.\n",
      "Document 813:\n",
      "2. Constructively self-criticize\n",
      "Document 814:\n",
      "your big-picture behavior constantly.\n",
      "Document 815:\n",
      "3. Reflect on past decisions and strategies to\n",
      "Document 816:\n",
      "refine your approach.\n",
      "Document 817:\n",
      "4. Every command has a cost, so\n",
      "Document 818:\n",
      "be smart and efficient. Aim to complete tasks in\n",
      "Document 819:\n",
      "the least number of steps.\n",
      "Document 820:\n",
      "You should only respond in JSON format as described\n",
      "Document 821:\n",
      "below\n",
      "Document 822:\n",
      "Response Format:\n",
      "{\n",
      "Document 823:\n",
      "\"thoughts\": {\n",
      "Document 824:\n",
      "\"text\": \"thought\",\n",
      "Document 825:\n",
      "\"reasoning\": \"reasoning\",\n",
      "Document 826:\n",
      "\"plan\": \"- short bulleted\\n-\n",
      "Document 827:\n",
      "list that conveys\\n- long-term\n",
      "Document 828:\n",
      "plan\",\n",
      "Document 829:\n",
      "\"criticism\": \"constructive\n",
      "Document 830:\n",
      "self-criticism\",\n",
      "Document 831:\n",
      "\"speak\": \"thoughts summary to say to\n",
      "Document 832:\n",
      "user\"\n",
      "Document 833:\n",
      "},\n",
      "Document 834:\n",
      "\"command\": {\n",
      "Document 835:\n",
      "\"name\": \"command name\",\n",
      "Document 836:\n",
      "\"args\": {\n",
      "Document 837:\n",
      "\"arg name\": \"value\"\n",
      "Document 838:\n",
      "}\n",
      "Document 839:\n",
      "}\n",
      "}\n",
      "Document 840:\n",
      "Ensure the response can be parsed by Python\n",
      "Document 841:\n",
      "json.loads\n",
      "Document 842:\n",
      "GPT-Engineer is another project to\n",
      "Document 843:\n",
      "create a whole repository of code given a task specified\n",
      "Document 844:\n",
      "in natural language. The GPT-Engineer\n",
      "Document 845:\n",
      "is instructed to think over a list of smaller components\n",
      "Document 846:\n",
      "to build and ask for user input to clarify questions\n",
      "Document 847:\n",
      "as needed.\n",
      "Document 848:\n",
      "Here are a sample conversation for task clarification sent\n",
      "Document 849:\n",
      "to OpenAI ChatCompletion endpoint used by\n",
      "Document 850:\n",
      "GPT-Engineer. The user inputs are\n",
      "Document 851:\n",
      "wrapped in {{user input text}}.\n",
      "Document 852:\n",
      "[\n",
      "  {\n",
      "Document 853:\n",
      "\"role\": \"system\",\n",
      "Document 854:\n",
      "\"content\": \"You will\n",
      "Document 855:\n",
      "read instructions and not carry them out, only seek\n",
      "Document 856:\n",
      "to clarify them.\\nSpecifically you will first\n",
      "Document 857:\n",
      "summarise a list of super short bullets of areas\n",
      "Document 858:\n",
      "that need clarification.\\nThen you will pick\n",
      "Document 859:\n",
      "one clarifying question, and wait for an answer\n",
      "Document 860:\n",
      "from the user.\\n\"\n",
      "Document 861:\n",
      "},\n",
      "  {\n",
      "Document 862:\n",
      "\"role\": \"user\",\n",
      "Document 863:\n",
      "\"content\": \"We are\n",
      "Document 864:\n",
      "writing {{a Super Mario game in python.\n",
      "Document 865:\n",
      "MVC components split in separate files. Keyboard\n",
      "Document 866:\n",
      "control.}}\\n\"\n",
      "Document 867:\n",
      "},\n",
      "  {\n",
      "Document 868:\n",
      "\"role\":\n",
      "Document 869:\n",
      "\"assistant\",\n",
      "Document 870:\n",
      "\"content\": \"Summary of\n",
      "Document 871:\n",
      "areas that need clarification:\\n1. Specifics\n",
      "Document 872:\n",
      "of the Super Mario game (e.g.\n",
      "Document 873:\n",
      "level design, characters, gameplay\n",
      "Document 874:\n",
      "mechanics)\\n2. Details about the MVC\n",
      "Document 875:\n",
      "components (e.g. which components are in\n",
      "Document 876:\n",
      "each file)\\n3. Keyboard control implementation\n",
      "Document 877:\n",
      "(e.g. which keys to use,\n",
      "Document 878:\n",
      "how to handle input)\\n\\nClarifying\n",
      "Document 879:\n",
      "question:\\nCan you provide more details about the\n",
      "Document 880:\n",
      "Super Mario game, such as level design,\n",
      "Document 881:\n",
      "characters, and gameplay mechanics?\"\n",
      "Document 882:\n",
      "},\n",
      "  {\n",
      "Document 883:\n",
      "\"role\": \"user\",\n",
      "Document 884:\n",
      "\"content\": \"{{There\n",
      "Document 885:\n",
      "are 10 levels in total. The main character is\n",
      "Document 886:\n",
      "a plumber named Mario, who can walk and\n",
      "Document 887:\n",
      "jump. It is a classical platform game just like\n",
      "Document 888:\n",
      "Super Mario. The main character moves from left to\n",
      "Document 889:\n",
      "right, trying to get to the destination, where\n",
      "Document 890:\n",
      "there are many obstacles and attacks from enemies in the\n",
      "Document 891:\n",
      "process.}}\\n\\nIs anything else\n",
      "Document 892:\n",
      "unclear? If yes, only answer in the\n",
      "Document 893:\n",
      "form:\\n{remaining unclear areas} remaining\n",
      "Document 894:\n",
      "questions.\\n{Next question}\\nIf\n",
      "Document 895:\n",
      "everything is sufficiently clear, only answer \\\"Nothing more\n",
      "Document 896:\n",
      "to clarify.\\\".\"\n",
      "Document 897:\n",
      "},\n",
      "  {\n",
      "Document 898:\n",
      "\"role\":\n",
      "Document 899:\n",
      "\"assistant\",\n",
      "Document 900:\n",
      "\"content\": \"Remaining\n",
      "Document 901:\n",
      "unclear areas: 2 remaining questions.\\nCan\n",
      "Document 902:\n",
      "you provide more information about how the MVC components\n",
      "Document 903:\n",
      "are split into separate files?\"\n",
      "Document 904:\n",
      "},\n",
      "  {\n",
      "Document 905:\n",
      "\"role\": \"user\",\n",
      "Document 906:\n",
      "\"content\": \"{{Make\n",
      "Document 907:\n",
      "your own assumptions and state them explicitly before\n",
      "Document 908:\n",
      "starting}}\"\n",
      "Document 909:\n",
      "}\n",
      "]\n",
      "Document 910:\n",
      "Then after these clarification, the agent moved into\n",
      "Document 911:\n",
      "the code writing mode with a different system message.\n",
      "Document 912:\n",
      "System message:\n",
      "Document 913:\n",
      "You will get instructions for code to write.\n",
      "Document 914:\n",
      "You will write a very long answer. Make\n",
      "Document 915:\n",
      "sure that every detail of the architecture is, in\n",
      "Document 916:\n",
      "the end, implemented as code.\n",
      "Document 917:\n",
      "Make sure that every detail of the architecture\n",
      "Document 918:\n",
      "is, in the end, implemented as code.\n",
      "Document 919:\n",
      "Think step by step and reason yourself to the\n",
      "Document 920:\n",
      "right decisions to make sure we get it right.\n",
      "Document 921:\n",
      "You will first lay out the names of the\n",
      "Document 922:\n",
      "core classes, functions, methods that will be\n",
      "Document 923:\n",
      "necessary, as well as a quick comment on their\n",
      "Document 924:\n",
      "purpose.\n",
      "Document 925:\n",
      "Then you will output the content of each file\n",
      "Document 926:\n",
      "including ALL code.\n",
      "Document 927:\n",
      "Each file must strictly follow a markdown code\n",
      "Document 928:\n",
      "block format, where the following tokens must be replaced\n",
      "Document 929:\n",
      "such that\n",
      "Document 930:\n",
      "FILENAME is the lowercase file name\n",
      "Document 931:\n",
      "including the file extension,\n",
      "Document 932:\n",
      "LANG is the markup code block language for\n",
      "Document 933:\n",
      "the code’s language, and CODE is\n",
      "Document 934:\n",
      "the code:\n",
      "Document 935:\n",
      "FILENAME\n",
      "CODE\n",
      "Document 936:\n",
      "You will start with the\n",
      "Document 937:\n",
      "“entrypoint” file, then go\n",
      "Document 938:\n",
      "to the ones that are imported by that file,\n",
      "Document 939:\n",
      "and so on.\n",
      "Document 940:\n",
      "Please note that the code should be fully\n",
      "Document 941:\n",
      "functional. No placeholders.\n",
      "Document 942:\n",
      "Follow a language and framework appropriate best practice file\n",
      "Document 943:\n",
      "naming convention.\n",
      "Document 944:\n",
      "Make sure that files contain all imports, types\n",
      "Document 945:\n",
      "etc. Make sure that code in different files are\n",
      "Document 946:\n",
      "compatible with each other.\n",
      "Document 947:\n",
      "Ensure to implement all code, if you\n",
      "Document 948:\n",
      "are unsure, write a plausible implementation.\n",
      "Document 949:\n",
      "Include module dependency or package manager dependency definition\n",
      "Document 950:\n",
      "file.\n",
      "Document 951:\n",
      "Before you finish, double check that all parts\n",
      "Document 952:\n",
      "of the architecture is present in the files.\n",
      "Document 953:\n",
      "Useful to know:\n",
      "Document 954:\n",
      "You almost always put different classes in different\n",
      "Document 955:\n",
      "files.\n",
      "Document 956:\n",
      "For Python, you always create an appropriate\n",
      "Document 957:\n",
      "requirements.txt file.\n",
      "Document 958:\n",
      "For NodeJS, you always create an appropriate\n",
      "Document 959:\n",
      "package.json file.\n",
      "Document 960:\n",
      "You always add a comment briefly describing the purpose\n",
      "Document 961:\n",
      "of the function definition.\n",
      "Document 962:\n",
      "You try to add comments explaining very complex bits\n",
      "Document 963:\n",
      "of logic.\n",
      "Document 964:\n",
      "You always follow the best practices for the requested\n",
      "Document 965:\n",
      "languages in terms of describing the code written as a\n",
      "Document 966:\n",
      "defined\n",
      "Document 967:\n",
      "package/project.\n",
      "Document 968:\n",
      "Python toolbelt preferences:\n",
      "Document 969:\n",
      "pytest\n",
      "dataclasses\n",
      "Document 970:\n",
      "Conversatin samples:\n",
      "[\n",
      "Document 971:\n",
      "{\n",
      "Document 972:\n",
      "\"role\": \"system\",\n",
      "Document 973:\n",
      "\"content\": \"You will\n",
      "Document 974:\n",
      "get instructions for code to write.\\nYou\n",
      "Document 975:\n",
      "will write a very long answer. Make sure that\n",
      "Document 976:\n",
      "every detail of the architecture is, in the\n",
      "Document 977:\n",
      "end, implemented as code.\\nMake sure\n",
      "Document 978:\n",
      "that every detail of the architecture is, in the\n",
      "Document 979:\n",
      "end, implemented as\n",
      "Document 980:\n",
      "code.\\n\\nThink step by step\n",
      "Document 981:\n",
      "and reason yourself to the right decisions to make sure\n",
      "Document 982:\n",
      "we get it right.\\nYou will first\n",
      "Document 983:\n",
      "lay out the names of the core classes,\n",
      "Document 984:\n",
      "functions, methods that will be necessary, as well\n",
      "Document 985:\n",
      "as a quick comment on their\n",
      "Document 986:\n",
      "purpose.\\n\\nThen you will output\n",
      "Document 987:\n",
      "the content of each file including ALL\n",
      "Document 988:\n",
      "code.\\nEach file must strictly follow a\n",
      "Document 989:\n",
      "markdown code block format, where the following tokens\n",
      "Document 990:\n",
      "must be replaced such that\\nFILENAME\n",
      "Document 991:\n",
      "is the lowercase file name including the file\n",
      "Document 992:\n",
      "extension,\\nLANG is the markup code\n",
      "Document 993:\n",
      "block language for the code's language, and CODE\n",
      "Document 994:\n",
      "is the\n",
      "Document 995:\n",
      "code:\\n\\n\n",
      "Document 996:\n",
      "FILENAME\\n\n",
      "Document 997:\n",
      "```LANG\\nC\n",
      "Document 998:\n",
      "ODE\\n```\\n\n",
      "Document 999:\n",
      "\\nYou\n",
      "Document 1000:\n",
      "will start with the \\\"entrypoint\\\" file,\n",
      "Document 1001:\n",
      "then go to the ones that are imported by that\n",
      "Document 1002:\n",
      "file, and so on.\\nPlease note\n",
      "Document 1003:\n",
      "that the code should be fully functional. No\n",
      "Document 1004:\n",
      "placeholders.\\n\\nFollow a language\n",
      "Document 1005:\n",
      "and framework appropriate best practice file naming\n",
      "Document 1006:\n",
      "convention.\\nMake sure that files contain all\n",
      "Document 1007:\n",
      "imports, types etc. Make sure that code in\n",
      "Document 1008:\n",
      "different files are compatible with each\n",
      "Document 1009:\n",
      "other.\\nEnsure to implement all\n",
      "Document 1010:\n",
      "code, if you are unsure, write a plausible\n",
      "Document 1011:\n",
      "implementation.\\nInclude module dependency or package\n",
      "Document 1012:\n",
      "manager dependency definition file.\\nBefore you\n",
      "Document 1013:\n",
      "finish, double check that all parts of the architecture\n",
      "Document 1014:\n",
      "is present in the\n",
      "Document 1015:\n",
      "files.\\n\\nUseful to\n",
      "Document 1016:\n",
      "know:\\nYou almost always put different classes in\n",
      "Document 1017:\n",
      "different files.\\nFor Python, you always\n",
      "Document 1018:\n",
      "create an appropriate requirements.txt\n",
      "Document 1019:\n",
      "file.\\nFor NodeJS, you always\n",
      "Document 1020:\n",
      "create an appropriate package.json\n",
      "Document 1021:\n",
      "file.\\nYou always add a comment briefly\n",
      "Document 1022:\n",
      "describing the purpose of the function\n",
      "Document 1023:\n",
      "definition.\\nYou try to add comments explaining\n",
      "Document 1024:\n",
      "very complex bits of logic.\\nYou always\n",
      "Document 1025:\n",
      "follow the best practices for the requested languages in terms\n",
      "Document 1026:\n",
      "of describing the code written as a\n",
      "Document 1027:\n",
      "defined\\n\n",
      "Document 1028:\n",
      "package/pr\n",
      "Document 1029:\n",
      "oject.\\n\\n\n",
      "Document 1030:\n",
      "\\nPython\n",
      "Document 1031:\n",
      "toolbelt preferences:\\n-\n",
      "Document 1032:\n",
      "pytest\\n-\n",
      "Document 1033:\n",
      "dataclasses\\n\"\n",
      "Document 1034:\n",
      "},\n",
      "Document 1035:\n",
      "#  … same conversation as earlier, ended\n",
      "Document 1036:\n",
      "with \"Make your own assumptions and state them explicitly\n",
      "Document 1037:\n",
      "before starting\".\n",
      "Document 1038:\n",
      "{\n",
      "Document 1039:\n",
      "\"role\":\n",
      "Document 1040:\n",
      "\"assistant\",\n",
      "Document 1041:\n",
      "\"content\":\n",
      "Document 1042:\n",
      "\"Assumptions:\\n1. Model:\n",
      "Document 1043:\n",
      "The model will contain the game's data, such\n",
      "Document 1044:\n",
      "as level information, character states, and enemy\n",
      "Document 1045:\n",
      "positions.\\n2. View: The view\n",
      "Document 1046:\n",
      "will handle the game's visuals, including rendering the\n",
      "Document 1047:\n",
      "game objects, backgrounds, and updating the\n",
      "Document 1048:\n",
      "display.\\n3. Controller: The controller\n",
      "Document 1049:\n",
      "will manage user input, such as keyboard controls,\n",
      "Document 1050:\n",
      "and update the model\n",
      "Document 1051:\n",
      "accordingly.\\n\\nWith these assumptions in\n",
      "Document 1052:\n",
      "mind, is there any specific aspect of the keyboard\n",
      "Document 1053:\n",
      "control implementation you would like me to clarify?\"\n",
      "Document 1054:\n",
      "},\n",
      "  {\n",
      "Document 1055:\n",
      "\"role\": \"user\",\n",
      "Document 1056:\n",
      "\"content\": \"Please now\n",
      "Document 1057:\n",
      "remember the steps:\\n\\nThink step by\n",
      "Document 1058:\n",
      "step and reason yourself to the right decisions to make\n",
      "Document 1059:\n",
      "sure we get it right.\\nFirst lay\n",
      "Document 1060:\n",
      "out the names of the core classes, functions,\n",
      "Document 1061:\n",
      "methods that will be necessary, As well as a\n",
      "Document 1062:\n",
      "quick comment on their\n",
      "Document 1063:\n",
      "purpose.\\n\\nThen you will output\n",
      "Document 1064:\n",
      "the content of each file including ALL\n",
      "Document 1065:\n",
      "code.\\nEach file must strictly follow a\n",
      "Document 1066:\n",
      "markdown code block format, where the following tokens\n",
      "Document 1067:\n",
      "must be replaced such that\\nFILENAME\n",
      "Document 1068:\n",
      "is the lowercase file name including the file\n",
      "Document 1069:\n",
      "extension,\\nLANG is the markup code\n",
      "Document 1070:\n",
      "block language for the code's language, and CODE\n",
      "Document 1071:\n",
      "is the\n",
      "Document 1072:\n",
      "code:\\n\\n\n",
      "Document 1073:\n",
      "FILENAME\\n\n",
      "Document 1074:\n",
      "```LANG\\nC\n",
      "Document 1075:\n",
      "ODE\\n```\\n\n",
      "Document 1076:\n",
      "\\nPlease\n",
      "Document 1077:\n",
      "note that the code should be fully functional. No\n",
      "Document 1078:\n",
      "placeholders.\\n\\nYou will start\n",
      "Document 1079:\n",
      "with the \\\"entrypoint\\\" file, then go\n",
      "Document 1080:\n",
      "to the ones that are imported by that file,\n",
      "Document 1081:\n",
      "and so on.\\nFollow a language and\n",
      "Document 1082:\n",
      "framework appropriate best practice file naming\n",
      "Document 1083:\n",
      "convention.\\nMake sure that files contain all\n",
      "Document 1084:\n",
      "imports, types etc. The code should be fully\n",
      "Document 1085:\n",
      "functional. Make sure that code in different files are\n",
      "Document 1086:\n",
      "compatible with each other.\\nBefore you\n",
      "Document 1087:\n",
      "finish, double check that all parts of the architecture\n",
      "Document 1088:\n",
      "is present in the files.\\n\"\n",
      "Document 1089:\n",
      "}\n",
      "]\n",
      "Challenges#\n",
      "Document 1090:\n",
      "After going through key ideas and demos of building\n",
      "Document 1091:\n",
      "LLM-centered agents, I start to see\n",
      "Document 1092:\n",
      "a couple common limitations:\n",
      "Document 1093:\n",
      "Finite context length: The restricted context capacity\n",
      "Document 1094:\n",
      "limits the inclusion of historical information, detailed instructions,\n",
      "Document 1095:\n",
      "API call context, and responses. The design of\n",
      "Document 1096:\n",
      "the system has to work with this limited communication\n",
      "Document 1097:\n",
      "bandwidth, while mechanisms like self-reflection to\n",
      "Document 1098:\n",
      "learn from past mistakes would benefit a lot from long\n",
      "Document 1099:\n",
      "or infinite context windows. Although vector stores and retrieval\n",
      "Document 1100:\n",
      "can provide access to a larger knowledge pool, their\n",
      "Document 1101:\n",
      "representation power is not as powerful as full attention.\n",
      "Document 1102:\n",
      "Challenges in long-term planning and task\n",
      "Document 1103:\n",
      "decomposition: Planning over a lengthy history and effectively\n",
      "Document 1104:\n",
      "exploring the solution space remain challenging. LLMs struggle\n",
      "Document 1105:\n",
      "to adjust plans when faced with unexpected errors, making\n",
      "Document 1106:\n",
      "them less robust compared to humans who learn from trial\n",
      "Document 1107:\n",
      "and error.\n",
      "Document 1108:\n",
      "Reliability of natural language interface: Current agent\n",
      "Document 1109:\n",
      "system relies on natural language as an interface between\n",
      "Document 1110:\n",
      "LLMs and external components such as memory and\n",
      "Document 1111:\n",
      "tools. However, the reliability of model outputs is\n",
      "Document 1112:\n",
      "questionable, as LLMs may make formatting errors and\n",
      "Document 1113:\n",
      "occasionally exhibit rebellious behavior (e.g. refuse\n",
      "Document 1114:\n",
      "to follow an instruction). Consequently, much of the\n",
      "Document 1115:\n",
      "agent demo code focuses on parsing model output.\n",
      "Document 1116:\n",
      "Citation#\n",
      "Document 1117:\n",
      "Cited as:\n",
      "Document 1118:\n",
      "Weng, Lilian. (Jun\n",
      "Document 1119:\n",
      "2023). “LLM-powered\n",
      "Document 1120:\n",
      "Autonomous Agents”.\n",
      "Document 1121:\n",
      "Lil’Log.\n",
      "Document 1122:\n",
      "https://l\n",
      "Document 1123:\n",
      "ilianweng.\n",
      "Document 1124:\n",
      "github.io/\n",
      "Document 1125:\n",
      "posts/2023\n",
      "Document 1126:\n",
      "-06-23-age\n",
      "Document 1127:\n",
      "nt/.\n",
      "Document 1128:\n",
      "Or\n",
      "Document 1129:\n",
      "@article{\n",
      "Document 1130:\n",
      "weng2023ag\n",
      "Document 1131:\n",
      "ent,\n",
      "Document 1132:\n",
      "title   =\n",
      "Document 1133:\n",
      "\"LLM-powered Autonomous Agents\",\n",
      "Document 1134:\n",
      "author  = \"Weng,\n",
      "Document 1135:\n",
      "Lilian\",\n",
      "Document 1136:\n",
      "journal =\n",
      "Document 1137:\n",
      "\"lilianwe\n",
      "Document 1138:\n",
      "ng.github.\n",
      "Document 1139:\n",
      "io\",\n",
      "Document 1140:\n",
      "year    =\n",
      "Document 1141:\n",
      "\"2023\",\n",
      "Document 1142:\n",
      "month   = \"Jun\",\n",
      "Document 1143:\n",
      "url     =\n",
      "Document 1144:\n",
      "\"https://\n",
      "Document 1145:\n",
      "lilianweng\n",
      "Document 1146:\n",
      ".github.io\n",
      "Document 1147:\n",
      "/posts/202\n",
      "Document 1148:\n",
      "3-06-23-ag\n",
      "Document 1149:\n",
      "ent/\"\n",
      "Document 1150:\n",
      "}\n",
      "References#\n",
      "Document 1151:\n",
      "[1] Wei et al.\n",
      "Document 1152:\n",
      "“Chain of thought prompting elicits reasoning in\n",
      "Document 1153:\n",
      "large language models.” NeurIPS 2022\n",
      "Document 1154:\n",
      "[2] Yao et al.\n",
      "Document 1155:\n",
      "“Tree of Thoughts: Dliberate Problem\n",
      "Document 1156:\n",
      "Solving with Large Language Models.”\n",
      "Document 1157:\n",
      "arXiv preprint\n",
      "Document 1158:\n",
      "arXiv:2305.10601\n",
      "Document 1159:\n",
      "(2023).\n",
      "Document 1160:\n",
      "[3] Liu et al.\n",
      "Document 1161:\n",
      "“Chain of Hindsight Aligns Language\n",
      "Document 1162:\n",
      "Models with Feedback\n",
      "Document 1163:\n",
      "“ arXiv preprint\n",
      "Document 1164:\n",
      "arXiv:2302.02676\n",
      "Document 1165:\n",
      "(2023).\n",
      "Document 1166:\n",
      "[4] Liu et al.\n",
      "Document 1167:\n",
      "“LLM+P: Empowering\n",
      "Document 1168:\n",
      "Large Language Models with Optimal Planning\n",
      "Document 1169:\n",
      "Proficiency” arXiv preprint\n",
      "Document 1170:\n",
      "arXiv:2304.11477\n",
      "Document 1171:\n",
      "(2023).\n",
      "Document 1172:\n",
      "[5] Yao et al.\n",
      "Document 1173:\n",
      "“ReAct: Synergizing reasoning and\n",
      "Document 1174:\n",
      "acting in language models.” ICLR\n",
      "Document 1175:\n",
      "2023.\n",
      "Document 1176:\n",
      "[6] Google Blog.\n",
      "Document 1177:\n",
      "“Announcing ScaNN: Efficient\n",
      "Document 1178:\n",
      "Vector Similarity Search” July 28,\n",
      "Document 1179:\n",
      "2020.\n",
      "Document 1180:\n",
      "[7]\n",
      "Document 1181:\n",
      "https://c\n",
      "Document 1182:\n",
      "hat.openai\n",
      "Document 1183:\n",
      ".com/share\n",
      "Document 1184:\n",
      "/46ff149e-\n",
      "Document 1185:\n",
      "a4c7-4dd7-\n",
      "Document 1186:\n",
      "a800-fc4a6\n",
      "Document 1187:\n",
      "42ea389\n",
      "Document 1188:\n",
      "[8] Shinn & Labash.\n",
      "Document 1189:\n",
      "“Reflexion: an autonomous agent with\n",
      "Document 1190:\n",
      "dynamic memory and self-reflection”\n",
      "Document 1191:\n",
      "arXiv preprint\n",
      "Document 1192:\n",
      "arXiv:2303.11366\n",
      "Document 1193:\n",
      "(2023).\n",
      "Document 1194:\n",
      "[9] Laskin et al.\n",
      "Document 1195:\n",
      "“In-context Reinforcement Learning with\n",
      "Document 1196:\n",
      "Algorithm Distillation” ICLR\n",
      "Document 1197:\n",
      "2023.\n",
      "Document 1198:\n",
      "[10] Karpas et al.\n",
      "Document 1199:\n",
      "“MRKL Systems A modular,\n",
      "Document 1200:\n",
      "neuro-symbolic architecture that combines large language\n",
      "Document 1201:\n",
      "models, external knowledge sources and discrete\n",
      "Document 1202:\n",
      "reasoning.” arXiv preprint\n",
      "Document 1203:\n",
      "arXiv:2205.00445\n",
      "Document 1204:\n",
      "(2022).\n",
      "Document 1205:\n",
      "[11] Nakano et al.\n",
      "Document 1206:\n",
      "“Webgpt: Browser-assisted\n",
      "Document 1207:\n",
      "question-answering with human\n",
      "Document 1208:\n",
      "feedback.” arXiv preprint\n",
      "Document 1209:\n",
      "arXiv:2112.09332\n",
      "Document 1210:\n",
      "(2021).\n",
      "Document 1211:\n",
      "[12] Parisi et al.\n",
      "Document 1212:\n",
      "“TALM: Tool Augmented Language\n",
      "Document 1213:\n",
      "Models”\n",
      "Document 1214:\n",
      "[13] Schick et al.\n",
      "Document 1215:\n",
      "“Toolformer: Language Models Can Teach\n",
      "Document 1216:\n",
      "Themselves to Use Tools.”\n",
      "Document 1217:\n",
      "arXiv preprint\n",
      "Document 1218:\n",
      "arXiv:2302.04761\n",
      "Document 1219:\n",
      "(2023).\n",
      "Document 1220:\n",
      "[14] Weaviate Blog. Why\n",
      "Document 1221:\n",
      "is Vector Search so fast? Sep 13,\n",
      "Document 1222:\n",
      "2022.\n",
      "Document 1223:\n",
      "[15] Li et al.\n",
      "Document 1224:\n",
      "“API-Bank: A Benchmark for\n",
      "Document 1225:\n",
      "Tool-Augmented LLMs”\n",
      "Document 1226:\n",
      "arXiv preprint\n",
      "Document 1227:\n",
      "arXiv:2304.08244\n",
      "Document 1228:\n",
      "(2023).\n",
      "Document 1229:\n",
      "[16] Shen et al.\n",
      "Document 1230:\n",
      "“HuggingGPT: Solving AI\n",
      "Document 1231:\n",
      "Tasks with ChatGPT and its Friends in\n",
      "Document 1232:\n",
      "HuggingFace” arXiv preprint\n",
      "Document 1233:\n",
      "arXiv:2303.17580\n",
      "Document 1234:\n",
      "(2023).\n",
      "Document 1235:\n",
      "[17] Bran et al.\n",
      "Document 1236:\n",
      "“ChemCrow: Augmenting\n",
      "Document 1237:\n",
      "large-language models with chemistry tools.”\n",
      "Document 1238:\n",
      "arXiv preprint\n",
      "Document 1239:\n",
      "arXiv:2304.05376\n",
      "Document 1240:\n",
      "(2023).\n",
      "Document 1241:\n",
      "[18] Boiko et al.\n",
      "Document 1242:\n",
      "“Emergent autonomous scientific research capabilities of large\n",
      "Document 1243:\n",
      "language models.” arXiv preprint\n",
      "Document 1244:\n",
      "arXiv:2304.05332\n",
      "Document 1245:\n",
      "(2023).\n",
      "Document 1246:\n",
      "[19] Joon Sung Park, et\n",
      "Document 1247:\n",
      "al. “Generative Agents: Interactive\n",
      "Document 1248:\n",
      "Simulacra of Human Behavior.”\n",
      "Document 1249:\n",
      "arXiv preprint\n",
      "Document 1250:\n",
      "arXiv:2304.03442\n",
      "Document 1251:\n",
      "(2023).\n",
      "Document 1252:\n",
      "[20] AutoGPT.\n",
      "Document 1253:\n",
      "https://g\n",
      "Document 1254:\n",
      "ithub.com/\n",
      "Document 1255:\n",
      "Significan\n",
      "Document 1256:\n",
      "t-Gravitas\n",
      "Document 1257:\n",
      "/Auto-GPT\n",
      "Document 1258:\n",
      "[21] GPT-Engineer.\n",
      "Document 1259:\n",
      "https://g\n",
      "Document 1260:\n",
      "ithub.com/\n",
      "Document 1261:\n",
      "AntonOsika\n",
      "Document 1262:\n",
      "/gpt-engin\n",
      "Document 1263:\n",
      "eer\n",
      "Document 1264:\n",
      "nlp\n",
      "language-model\n",
      "agent\n",
      "Document 1265:\n",
      "steerability\n",
      "prompting\n",
      "Document 1266:\n",
      "«\n",
      "Document 1267:\n",
      "Adversarial Attacks on LLMs\n",
      "Document 1268:\n",
      "»\n",
      "\n",
      "Prompt Engineering\n",
      "Document 1269:\n",
      "© 2024 Lil'Log\n",
      "Document 1270:\n",
      "Powered by\n",
      "Document 1271:\n",
      "Hugo &\n",
      "Document 1272:\n",
      "PaperMod\n",
      "Document 1273:\n",
      "Prompt Engineering | Lil'Log\n",
      "Document 1274:\n",
      "Lil'Log\n",
      "Document 1275:\n",
      "Posts\n",
      "Document 1276:\n",
      "Archive\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "Search\n",
      "Document 1277:\n",
      "Tags\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "FAQ\n",
      "Document 1278:\n",
      "emojisearch.app\n",
      "Document 1279:\n",
      "Prompt Engineering\n",
      "Document 1280:\n",
      "Date: March 15, 2023  |\n",
      "Document 1281:\n",
      "Estimated Reading Time: 21 min  |\n",
      "Document 1282:\n",
      "Author: Lilian Weng\n",
      "Document 1283:\n",
      "Table of Contents\n",
      "Document 1284:\n",
      "Basic Prompting\n",
      "\n",
      "Zero-Shot\n",
      "Document 1285:\n",
      "Few-shot\n",
      "Document 1286:\n",
      "Tips for Example Selection\n",
      "Document 1287:\n",
      "Tips for Example Ordering\n",
      "Document 1288:\n",
      "Instruction Prompting\n",
      "Document 1289:\n",
      "Self-Consistency Sampling\n",
      "Document 1290:\n",
      "Chain-of-Thought\n",
      "Document 1291:\n",
      "(CoT)\n",
      "Document 1292:\n",
      "Types of CoT prompts\n",
      "Document 1293:\n",
      "Tips and Extensions\n",
      "Document 1294:\n",
      "Automatic Prompt Design\n",
      "Document 1295:\n",
      "Augmented Language Models\n",
      "Document 1296:\n",
      "Retrieval\n",
      "\n",
      "Programming Language\n",
      "Document 1297:\n",
      "External APIs\n",
      "\n",
      "\n",
      "Citation\n",
      "Document 1298:\n",
      "Useful Resources\n",
      "\n",
      "References\n",
      "Document 1299:\n",
      "Prompt Engineering, also known as\n",
      "Document 1300:\n",
      "In-Context Prompting, refers to methods for\n",
      "Document 1301:\n",
      "how to communicate with LLM to steer its behavior\n",
      "Document 1302:\n",
      "for desired outcomes without updating the model weights. It\n",
      "Document 1303:\n",
      "is an empirical science and the effect of prompt engineering\n",
      "Document 1304:\n",
      "methods can vary a lot among models, thus requiring\n",
      "Document 1305:\n",
      "heavy experimentation and heuristics.\n",
      "Document 1306:\n",
      "This post only focuses on prompt engineering for\n",
      "Document 1307:\n",
      "autoregressive language models, so nothing with\n",
      "Document 1308:\n",
      "Cloze tests, image generation or multimodality\n",
      "Document 1309:\n",
      "models. At its core, the goal of prompt\n",
      "Document 1310:\n",
      "engineering is about alignment and model steerability. Check\n",
      "Document 1311:\n",
      "my previous post on controllable text generation.\n",
      "Document 1312:\n",
      "[My personal spicy take] In my\n",
      "Document 1313:\n",
      "opinion, some prompt engineering papers are not worthy 8\n",
      "Document 1314:\n",
      "pages long, since those tricks can be explained in\n",
      "Document 1315:\n",
      "one or a few sentences and the rest is all\n",
      "Document 1316:\n",
      "about benchmarking. An easy-to-use\n",
      "Document 1317:\n",
      "and shared benchmark infrastructure should be more beneficial to the\n",
      "Document 1318:\n",
      "community. Iterative prompting or external tool use would\n",
      "Document 1319:\n",
      "not be trivial to set up. Also\n",
      "Document 1320:\n",
      "non-trivial to align the whole research\n",
      "Document 1321:\n",
      "community to adopt it.\n",
      "Document 1322:\n",
      "Basic Prompting#\n",
      "Document 1323:\n",
      "Zero-shot and few-shot learning are\n",
      "Document 1324:\n",
      "two most basic approaches for prompting the model, pioneered\n",
      "Document 1325:\n",
      "by many LLM papers and commonly used for\n",
      "Document 1326:\n",
      "benchmarking LLM performance.\n",
      "Document 1327:\n",
      "Zero-Shot#\n",
      "Document 1328:\n",
      "Zero-shot learning is to simply feed the\n",
      "Document 1329:\n",
      "task text to the model and ask for results.\n",
      "Document 1330:\n",
      "(All the sentiment analysis examples are from\n",
      "Document 1331:\n",
      "SST-2)\n",
      "Document 1332:\n",
      "Text: i'll bet the video game is\n",
      "Document 1333:\n",
      "a lot more fun than the film.\n",
      "Document 1334:\n",
      "Sentiment:\n",
      "Few-shot#\n",
      "Document 1335:\n",
      "Few-shot learning presents a set of\n",
      "Document 1336:\n",
      "high-quality demonstrations, each consisting of both input\n",
      "Document 1337:\n",
      "and desired output, on the target task. As\n",
      "Document 1338:\n",
      "the model first sees good examples, it can better\n",
      "Document 1339:\n",
      "understand human intention and criteria for what kinds of answers\n",
      "Document 1340:\n",
      "are wanted. Therefore, few-shot learning often\n",
      "Document 1341:\n",
      "leads to better performance than zero-shot.\n",
      "Document 1342:\n",
      "However, it comes at the cost of more token\n",
      "Document 1343:\n",
      "consumption and may hit the context length limit when input\n",
      "Document 1344:\n",
      "and output text are long.\n",
      "Document 1345:\n",
      "Text: (lawrence bounces) all over\n",
      "Document 1346:\n",
      "the stage, dancing, running, sweating,\n",
      "Document 1347:\n",
      "mopping his face and generally displaying the wacky\n",
      "Document 1348:\n",
      "talent that brought him fame in the first place.\n",
      "Document 1349:\n",
      "Sentiment: positive\n",
      "Document 1350:\n",
      "Text: despite all evidence to the contrary,\n",
      "Document 1351:\n",
      "this clunker has somehow managed to pose as an\n",
      "Document 1352:\n",
      "actual feature movie, the kind that charges full admission\n",
      "Document 1353:\n",
      "and gets hyped on tv and purports to\n",
      "Document 1354:\n",
      "amuse small children and ostensible adults.\n",
      "Document 1355:\n",
      "Sentiment: negative\n",
      "Document 1356:\n",
      "Text: for the first time in years,\n",
      "Document 1357:\n",
      "de niro digs deep emotionally, perhaps because\n",
      "Document 1358:\n",
      "he's been stirred by the powerful work of his\n",
      "Document 1359:\n",
      "co-stars.\n",
      "Document 1360:\n",
      "Sentiment: positive\n",
      "Document 1361:\n",
      "Text: i'll bet the video game is\n",
      "Document 1362:\n",
      "a lot more fun than the film.\n",
      "Document 1363:\n",
      "Sentiment:\n",
      "Document 1364:\n",
      "Many studies looked into how to construct\n",
      "Document 1365:\n",
      "in-context examples to maximize the performance and observed\n",
      "Document 1366:\n",
      "that choice of prompt format, training examples, and\n",
      "Document 1367:\n",
      "the order of the examples can lead to dramatically different\n",
      "Document 1368:\n",
      "performance, from near random guess to near\n",
      "Document 1369:\n",
      "SoTA.\n",
      "Document 1370:\n",
      "Zhao et al. (2021)\n",
      "Document 1371:\n",
      "investigated the case of few-shot classification and proposed\n",
      "Document 1372:\n",
      "that several biases with LLM (they use\n",
      "Document 1373:\n",
      "GPT-3 in the experiments) contribute to\n",
      "Document 1374:\n",
      "such high variance: (1) Majority label bias\n",
      "Document 1375:\n",
      "exists if distribution of labels among the examples is\n",
      "Document 1376:\n",
      "unbalanced; (2) Recency bias refers\n",
      "Document 1377:\n",
      "to the tendency where the model may repeat the label\n",
      "Document 1378:\n",
      "at the end; (3) Common token bias\n",
      "Document 1379:\n",
      "indicates that LLM tends to produce common tokens more\n",
      "Document 1380:\n",
      "often than rare tokens. To conquer such bias,\n",
      "Document 1381:\n",
      "they proposed a method to calibrate the label probabilities\n",
      "Document 1382:\n",
      "output by the model to be uniform when the input\n",
      "Document 1383:\n",
      "string is N/A.\n",
      "Document 1384:\n",
      "Tips for Example Selection#\n",
      "Document 1385:\n",
      "Choose examples that are semantically similar to the\n",
      "Document 1386:\n",
      "test example using $k$-NN clustering\n",
      "Document 1387:\n",
      "in the embedding space (Liu et\n",
      "Document 1388:\n",
      "al., 2021)\n",
      "Document 1389:\n",
      "To select a diverse and representative set of\n",
      "Document 1390:\n",
      "examples, Su et al. (2022)\n",
      "Document 1391:\n",
      "proposed to use a graph-based approach:\n",
      "Document 1392:\n",
      "(1) First, construct a directed graph\n",
      "Document 1393:\n",
      "$G=(V, E)$ based on\n",
      "Document 1394:\n",
      "the embedding (e.g. by\n",
      "Document 1395:\n",
      "SBERT or other embedding models)\n",
      "Document 1396:\n",
      "cosine similarity between samples, where each node points\n",
      "Document 1397:\n",
      "to its $k$ nearest neighbors;\n",
      "Document 1398:\n",
      "(2) Start with a set of selected samples\n",
      "Document 1399:\n",
      "$\\mathcal\n",
      "Document 1400:\n",
      "{L}=\\empty\n",
      "Document 1401:\n",
      "set$\n",
      "Document 1402:\n",
      "and a set of remaining samples\n",
      "Document 1403:\n",
      "$\\mathcal{U}$. Each sample\n",
      "Document 1404:\n",
      "$u \\in\n",
      "Document 1405:\n",
      "\\mathcal{U}$ is scored by\n",
      "Document 1406:\n",
      "$$\n",
      "Document 1407:\n",
      "\\text{score}(u) =\n",
      "Document 1408:\n",
      "\\sum_{v \\in \\{v\n",
      "Document 1409:\n",
      "\\mid (u, v) \\in\n",
      "Document 1410:\n",
      "E, v\\in\n",
      "Document 1411:\n",
      "\\mathcal{U}\\}}\n",
      "Document 1412:\n",
      "s(v)\\quad\\text{where\n",
      "Document 1413:\n",
      "}s(v)=\\rh\n",
      "Document 1414:\n",
      "o^{-\n",
      "Document 1415:\n",
      "\\vert \\{\\ell \\in\n",
      "Document 1416:\n",
      "\\mathcal{L} \\vert\n",
      "Document 1417:\n",
      "(v, \\ell)\\in E\n",
      "Document 1418:\n",
      "\\}\\vert},\\quad\\rho >\n",
      "Document 1419:\n",
      "1\n",
      "Document 1420:\n",
      "$$ such that $s(v)$\n",
      "Document 1421:\n",
      "is low if many of\n",
      "Document 1422:\n",
      "$v$’s neighbors are selected and\n",
      "Document 1423:\n",
      "thus the scoring encourages to pick diverse samples.\n",
      "Document 1424:\n",
      "Rubin et al. (2022)\n",
      "Document 1425:\n",
      "proposed to train embeddings via contrastive learning\n",
      "Document 1426:\n",
      "specific to one training dataset for in-context learning\n",
      "Document 1427:\n",
      "sample selection.  Given each training pair\n",
      "Document 1428:\n",
      "$(x, y)$, the quality of one\n",
      "Document 1429:\n",
      "example $e_i$ (formatted\n",
      "Document 1430:\n",
      "input-output pair) can be measured by a\n",
      "Document 1431:\n",
      "conditioned probability assigned by LM:\n",
      "Document 1432:\n",
      "$\\text{sc\n",
      "Document 1433:\n",
      "ore}(e_i)\n",
      "Document 1434:\n",
      "= P_\\text{LM}(y\n",
      "Document 1435:\n",
      "\\mid e_i, x)$. We\n",
      "Document 1436:\n",
      "can identify other examples with top-$k$ and\n",
      "Document 1437:\n",
      "bottom-$k$ scores as positive and negative sets\n",
      "Document 1438:\n",
      "of candidates for every training pair and use that for\n",
      "Document 1439:\n",
      "contrastive learning.\n",
      "Document 1440:\n",
      "Some researchers tried Q-Learning to do sample\n",
      "Document 1441:\n",
      "selection. (Zhang et al. 2022)\n",
      "Document 1442:\n",
      "Motivated by uncertainty-based active learning,\n",
      "Document 1443:\n",
      "Diao et al. (2023) suggested\n",
      "Document 1444:\n",
      "to identify examples with high disagreement or entropy among multiple\n",
      "Document 1445:\n",
      "sampling trials. Then annotate these examples to be\n",
      "Document 1446:\n",
      "used in few-shot prompts.\n",
      "Document 1447:\n",
      "Tips for Example Ordering#\n",
      "Document 1448:\n",
      "A general suggestion is to keep the selection of\n",
      "Document 1449:\n",
      "examples diverse, relevant to the test sample and in\n",
      "Document 1450:\n",
      "random order to avoid majority label bias and recency\n",
      "Document 1451:\n",
      "bias.\n",
      "Document 1452:\n",
      "Increasing model sizes or including more training examples\n",
      "Document 1453:\n",
      "does not reduce variance among different permutations of\n",
      "Document 1454:\n",
      "in-context examples. Same order may work well\n",
      "Document 1455:\n",
      "for one model but badly for another. When the\n",
      "Document 1456:\n",
      "validation set is limited, consider choosing the order such\n",
      "Document 1457:\n",
      "that the model does not produce extremely unbalanced predictions\n",
      "Document 1458:\n",
      "or being overconfident about its predictions.\n",
      "Document 1459:\n",
      "(Lu et al. 2022)\n",
      "Document 1460:\n",
      "Instruction Prompting#\n",
      "Document 1461:\n",
      "The purpose of presenting few-shot examples in\n",
      "Document 1462:\n",
      "the prompt is to explain our intent to the\n",
      "Document 1463:\n",
      "model; in other words, describe the task instruction\n",
      "Document 1464:\n",
      "to the model in the form of demonstrations.\n",
      "Document 1465:\n",
      "However, few-shot can be expensive in terms\n",
      "Document 1466:\n",
      "of token usage and restricts the input length due to\n",
      "Document 1467:\n",
      "limited context length. So, why not just give\n",
      "Document 1468:\n",
      "the instruction directly?\n",
      "Document 1469:\n",
      "Instructed LM (e.g.\n",
      "Document 1470:\n",
      "InstructGPT, natural instruction) finetunes\n",
      "Document 1471:\n",
      "a pretrained model with high-quality tuples\n",
      "Document 1472:\n",
      "of (task instruction, input, ground truth\n",
      "Document 1473:\n",
      "output) to make LM better understand user intention and\n",
      "Document 1474:\n",
      "follow instruction. RLHF (Reinforcement Learning\n",
      "Document 1475:\n",
      "from Human Feedback) is a common method to do\n",
      "Document 1476:\n",
      "so. The benefit of instruction following style\n",
      "Document 1477:\n",
      "fine-tuning improves the model to be more\n",
      "Document 1478:\n",
      "aligned with human intention and greatly reduces the cost of\n",
      "Document 1479:\n",
      "communication.\n",
      "Document 1480:\n",
      "When interacting with instruction models, we should describe\n",
      "Document 1481:\n",
      "the task requirement in details, trying to be specific\n",
      "Document 1482:\n",
      "and precise and avoiding say “not do\n",
      "Document 1483:\n",
      "something” but rather specify what to do.\n",
      "Document 1484:\n",
      "Please label the sentiment towards the movie of the\n",
      "Document 1485:\n",
      "given movie review. The sentiment label should be\n",
      "Document 1486:\n",
      "\"positive\" or \"negative\".\n",
      "Document 1487:\n",
      "Text: i'll bet the video game is\n",
      "Document 1488:\n",
      "a lot more fun than the film.\n",
      "Document 1489:\n",
      "Sentiment:\n",
      "Document 1490:\n",
      "Explaining the desired audience is another smart way\n",
      "Document 1491:\n",
      "to give instructions\n",
      "Document 1492:\n",
      "For example to produce education materials for kids,\n",
      "Document 1493:\n",
      "Describe what is quantum physics to a\n",
      "Document 1494:\n",
      "6-year-old.\n",
      "Document 1495:\n",
      "And safe content,\n",
      "Document 1496:\n",
      "... in language that is safe for work.\n",
      "Document 1497:\n",
      "In-context instruction learning (Ye et\n",
      "Document 1498:\n",
      "al. 2023) combines few-shot learning\n",
      "Document 1499:\n",
      "with instruction prompting. It incorporates multiple demonstration examples across\n",
      "Document 1500:\n",
      "different tasks in the prompt, each demonstration consisting of\n",
      "Document 1501:\n",
      "instruction, task input and output. Note that their\n",
      "Document 1502:\n",
      "experiments were only on classification tasks and the instruction prompt\n",
      "Document 1503:\n",
      "contains all label options.\n",
      "Document 1504:\n",
      "Definition: Determine the speaker of the\n",
      "Document 1505:\n",
      "dialogue, \"agent\" or \"customer\".\n",
      "Document 1506:\n",
      "Input: I have successfully booked your tickets.\n",
      "Document 1507:\n",
      "Ouput: agent\n",
      "Document 1508:\n",
      "Definition: Determine which category the question asks\n",
      "Document 1509:\n",
      "for, \"Quantity\" or \"Location\".\n",
      "Document 1510:\n",
      "Input: What's the oldest building in\n",
      "Document 1511:\n",
      "US?\n",
      "Document 1512:\n",
      "Ouput: Location\n",
      "Document 1513:\n",
      "Definition: Classify the sentiment of the given\n",
      "Document 1514:\n",
      "movie review, \"positive\" or \"negative\".\n",
      "Document 1515:\n",
      "Input: i'll bet the video game is\n",
      "Document 1516:\n",
      "a lot more fun than the film.\n",
      "Document 1517:\n",
      "Output:\n",
      "Document 1518:\n",
      "Self-Consistency Sampling#\n",
      "Document 1519:\n",
      "Self-consistency sampling (Wang\n",
      "Document 1520:\n",
      "et al. 2022a) is to sample multiple\n",
      "Document 1521:\n",
      "outputs with temperature > 0 and then selecting the best\n",
      "Document 1522:\n",
      "one out of these candidates.\n",
      "Document 1523:\n",
      "The criteria for selecting the best candidate can vary\n",
      "Document 1524:\n",
      "from task to task. A general solution is to\n",
      "Document 1525:\n",
      "pick majority vote. For tasks that are easy to\n",
      "Document 1526:\n",
      "validate such as a programming question with unit tests,\n",
      "Document 1527:\n",
      "we can simply run through the interpreter and verify the\n",
      "Document 1528:\n",
      "correctness with unit tests.\n",
      "Document 1529:\n",
      "Chain-of-Thought\n",
      "Document 1530:\n",
      "(CoT)#\n",
      "Document 1531:\n",
      "Chain-of-thought (CoT)\n",
      "Document 1532:\n",
      "prompting (Wei et al. 2022) generates\n",
      "Document 1533:\n",
      "a sequence of short sentences to describe reasoning logics\n",
      "Document 1534:\n",
      "step by step, known as reasoning chains or\n",
      "Document 1535:\n",
      "rationales, to eventually lead to the final\n",
      "Document 1536:\n",
      "answer. The benefit of CoT is more pronounced\n",
      "Document 1537:\n",
      "for complicated reasoning tasks, while using large models\n",
      "Document 1538:\n",
      "(e.g. with more than 50B\n",
      "Document 1539:\n",
      "parameters). Simple tasks only benefit slightly from CoT\n",
      "Document 1540:\n",
      "prompting.\n",
      "Document 1541:\n",
      "Types of CoT prompts#\n",
      "Document 1542:\n",
      "Two main types of CoT prompting:\n",
      "Document 1543:\n",
      "Few-shot CoT. It is to\n",
      "Document 1544:\n",
      "prompt the model with a few demonstrations, each containing\n",
      "Document 1545:\n",
      "manually written (or model-generated)\n",
      "Document 1546:\n",
      "high-quality reasoning chains.\n",
      "Document 1547:\n",
      "(All the math reasoning examples are from\n",
      "Document 1548:\n",
      "GSM8k)\n",
      "Document 1549:\n",
      "Question: Tom and Elizabeth have a competition to\n",
      "Document 1550:\n",
      "climb a hill. Elizabeth takes 30 minutes to climb\n",
      "Document 1551:\n",
      "the hill. Tom takes four times as long as\n",
      "Document 1552:\n",
      "Elizabeth does to climb the hill. How many hours\n",
      "Document 1553:\n",
      "does it take Tom to climb up the hill?\n",
      "Document 1554:\n",
      "Answer: It takes Tom 30*4 =\n",
      "Document 1555:\n",
      "<<30*4=120>>120 minutes to\n",
      "Document 1556:\n",
      "climb the hill.\n",
      "Document 1557:\n",
      "It takes Tom 120/60 =\n",
      "Document 1558:\n",
      "<<120/60=2>>2 hours to\n",
      "Document 1559:\n",
      "climb the hill.\n",
      "Document 1560:\n",
      "So the answer is 2.\n",
      "===\n",
      "Document 1561:\n",
      "Question: Jack is a soccer player. He\n",
      "Document 1562:\n",
      "needs to buy two pairs of socks and a pair\n",
      "Document 1563:\n",
      "of soccer shoes. Each pair of socks cost\n",
      "Document 1564:\n",
      "$9.50, and the shoes cost\n",
      "Document 1565:\n",
      "$92. Jack has $40. How much\n",
      "Document 1566:\n",
      "more money does Jack need?\n",
      "Document 1567:\n",
      "Answer: The total cost of two pairs of\n",
      "Document 1568:\n",
      "socks is $9.50 x 2 =\n",
      "Document 1569:\n",
      "$<<9.5*2=\n",
      "Document 1570:\n",
      "19>>19.\n",
      "Document 1571:\n",
      "The total cost of the socks and the shoes\n",
      "Document 1572:\n",
      "is $19 + $92 =\n",
      "Document 1573:\n",
      "$<<19+92=\n",
      "Document 1574:\n",
      "111>>111.\n",
      "Document 1575:\n",
      "Jack need $111 - $40 =\n",
      "Document 1576:\n",
      "$<<111-40=71>>71\n",
      "Document 1577:\n",
      "more.\n",
      "Document 1578:\n",
      "So the answer is 71.\n",
      "===\n",
      "Document 1579:\n",
      "Question: Marty has 100 centimeters of ribbon that\n",
      "Document 1580:\n",
      "he must cut into 4 equal parts. Each of\n",
      "Document 1581:\n",
      "the cut parts must be divided into 5 equal\n",
      "Document 1582:\n",
      "parts. How long will each final cut be?\n",
      "Document 1583:\n",
      "Answer:\n",
      "Document 1584:\n",
      "Zero-shot CoT. Use natural language\n",
      "Document 1585:\n",
      "statement like Let's think step by step to explicitly\n",
      "Document 1586:\n",
      "encourage the model to first generate reasoning chains and then\n",
      "Document 1587:\n",
      "to prompt with Therefore, the answer is to produce\n",
      "Document 1588:\n",
      "answers (Kojima et al. 2022 ).\n",
      "Document 1589:\n",
      "Or a similar statement Let's work this out it\n",
      "Document 1590:\n",
      "a step by step to be sure we have the\n",
      "Document 1591:\n",
      "right answer (Zhou et al. 2022).\n",
      "Document 1592:\n",
      "Question: Marty has 100 centimeters of ribbon that\n",
      "Document 1593:\n",
      "he must cut into 4 equal parts. Each of\n",
      "Document 1594:\n",
      "the cut parts must be divided into 5 equal\n",
      "Document 1595:\n",
      "parts. How long will each final cut be?\n",
      "Document 1596:\n",
      "Answer: Let's think step by step.\n",
      "Document 1597:\n",
      "Tips and Extensions#\n",
      "Document 1598:\n",
      "Self-consistency sampling can improve reasoning\n",
      "Document 1599:\n",
      "accuracy by sampling a number of diverse answers and then\n",
      "Document 1600:\n",
      "taking the majority vote. (Wang et\n",
      "Document 1601:\n",
      "al. 2022a)\n",
      "Document 1602:\n",
      "Another approach for ensemble learning is to alter the\n",
      "Document 1603:\n",
      "example order or use model generated rationales to replace\n",
      "Document 1604:\n",
      "human-written ones to introduce randomness during multiple\n",
      "Document 1605:\n",
      "sample trials. Then aggregate model outputs with a majority\n",
      "Document 1606:\n",
      "vote to get final answer. (Wang et\n",
      "Document 1607:\n",
      "al. 2022b)\n",
      "Document 1608:\n",
      "If training examples are only associated with true answers\n",
      "Document 1609:\n",
      "(easy to verify!) but no rationales,\n",
      "Document 1610:\n",
      "we can follow the STaR\n",
      "Document 1611:\n",
      "(Self-Taught Reasoner;\n",
      "Document 1612:\n",
      "Zelikman et al. 2022) method :\n",
      "Document 1613:\n",
      "(1) Ask LLM to generate reasoning chains\n",
      "Document 1614:\n",
      "and only keep those leading to correct answers;\n",
      "Document 1615:\n",
      "(2) Then fine-tune the model\n",
      "Document 1616:\n",
      "with generated rationales and repeat the process until\n",
      "Document 1617:\n",
      "convergence. Note that higher temperature is more likely to\n",
      "Document 1618:\n",
      "generate incorrect rationales with correct answers. If training\n",
      "Document 1619:\n",
      "examples do not have ground truth answers, maybe consider\n",
      "Document 1620:\n",
      "using majority votes as the “correct”\n",
      "Document 1621:\n",
      "answers.\n",
      "Document 1622:\n",
      "Prompts with demonstrations of higher reasoning complexity\n",
      "Document 1623:\n",
      "can achieve better performance, where complexity is measured by\n",
      "Document 1624:\n",
      "the number of reasoning steps in the chains. When\n",
      "Document 1625:\n",
      "separating reasoning steps, newline \\n symbol works\n",
      "Document 1626:\n",
      "better than step i, period . or\n",
      "Document 1627:\n",
      "semicolon ;. (Fu et al.\n",
      "Document 1628:\n",
      "2023)\n",
      "Document 1629:\n",
      "Complexity-based consistency is to explicitly\n",
      "Document 1630:\n",
      "prefer complex chains among all the generations by taking majority\n",
      "Document 1631:\n",
      "vote among only top $k$ complex chains.\n",
      "Document 1632:\n",
      "(Fu et al. 2023)\n",
      "Document 1633:\n",
      "Later, Shum et al.\n",
      "Document 1634:\n",
      "(2023) found that in their experiments\n",
      "Document 1635:\n",
      "CoT prompts with only complex examples can improve the\n",
      "Document 1636:\n",
      "accuracy of complex questions, but perform poorly in simple\n",
      "Document 1637:\n",
      "questions; evidence shown on GSM8k.\n",
      "Document 1638:\n",
      "Changing Q: to Question: is found to\n",
      "Document 1639:\n",
      "be helpful. (Fu et al.\n",
      "Document 1640:\n",
      "2023)\n",
      "Document 1641:\n",
      "Ye & Durrett (2022) found\n",
      "Document 1642:\n",
      "that the benefit of including explanations in the prompt is\n",
      "Document 1643:\n",
      "small to moderate for NLP tasks that involve reasoning\n",
      "Document 1644:\n",
      "over text (i.e. QA and\n",
      "Document 1645:\n",
      "NLI) and the effects vary by models.\n",
      "Document 1646:\n",
      "They observed that explanations are more likely to be\n",
      "Document 1647:\n",
      "nonfactual than be inconsistent\n",
      "Document 1648:\n",
      "(i.e. whether explanation entails prediction).\n",
      "Document 1649:\n",
      "Nonfactual explanations most likely lead to incorrect\n",
      "Document 1650:\n",
      "predictions.\n",
      "Document 1651:\n",
      "Self-Ask (Press et al.\n",
      "Document 1652:\n",
      "2022) is a method to repeatedly prompt the model\n",
      "Document 1653:\n",
      "to ask following-up questions to construct the thought\n",
      "Document 1654:\n",
      "process iteratively. Follow-up questions can be\n",
      "Document 1655:\n",
      "answered by search engine results. Similarly,\n",
      "Document 1656:\n",
      "IRCoT (Interleaving Retrieval\n",
      "Document 1657:\n",
      "CoT; Trivedi et al.\n",
      "Document 1658:\n",
      "2022) and ReAct (Reason + Act;\n",
      "Document 1659:\n",
      "Yao et al. 2023) combines iterative\n",
      "Document 1660:\n",
      "CoT prompting with queries to Wikipedia APIs to search\n",
      "Document 1661:\n",
      "for relevant entities and content and then add it back\n",
      "Document 1662:\n",
      "into the context.\n",
      "Document 1663:\n",
      "Fig. 1. How Self-Ask works\n",
      "Document 1664:\n",
      "with external search queries.(Image source: Press et\n",
      "Document 1665:\n",
      "al. 2022).\n",
      "Document 1666:\n",
      "Tree of Thoughts (Yao et al.\n",
      "Document 1667:\n",
      "2023) extends CoT by exploring multiple reasoning\n",
      "Document 1668:\n",
      "possibilities at each step. It first decomposes the\n",
      "Document 1669:\n",
      "problem into multiple thought steps and generates multiple thoughts per\n",
      "Document 1670:\n",
      "step, essentially creating a tree structure. The search\n",
      "Document 1671:\n",
      "process can be BFS or DFS while each\n",
      "Document 1672:\n",
      "state is evaluated by a classifier (via a\n",
      "Document 1673:\n",
      "prompt) or majority vote.\n",
      "Document 1674:\n",
      "Fig. 2. How Self-Ask works\n",
      "Document 1675:\n",
      "with external search queries.(Image source: Yao et\n",
      "Document 1676:\n",
      "al. 2022).\n",
      "Document 1677:\n",
      "Automatic Prompt Design#\n",
      "Document 1678:\n",
      "Prompt is a sequence of prefix tokens that\n",
      "Document 1679:\n",
      "increase the probability of getting  desired output given\n",
      "Document 1680:\n",
      "input. Therefore we can treat them as trainable\n",
      "Document 1681:\n",
      "parameters and optimize them directly on the embedding space\n",
      "Document 1682:\n",
      "via gradient descent, such as AutoPrompt\n",
      "Document 1683:\n",
      "(Shin et al., 2020,\n",
      "Document 1684:\n",
      "Prefix-Tuning (Li & Liang\n",
      "Document 1685:\n",
      "(2021)), P-tuning\n",
      "Document 1686:\n",
      "(Liu et al. 2021) and\n",
      "Document 1687:\n",
      "Prompt-Tuning (Lester et al.\n",
      "Document 1688:\n",
      "2021). This section in my\n",
      "Document 1689:\n",
      "“Controllable Neural Text Generation”\n",
      "Document 1690:\n",
      "post has a good coverage of them. The trend\n",
      "Document 1691:\n",
      "from AutoPrompt to Prompt-Tuning is\n",
      "Document 1692:\n",
      "that the setup gets gradually simplified.\n",
      "Document 1693:\n",
      "APE (Automatic Prompt Engineer; Zhou et\n",
      "Document 1694:\n",
      "al. 2022) is a method to search over\n",
      "Document 1695:\n",
      "a pool of model-generated instruction candidates and then\n",
      "Document 1696:\n",
      "filters the candidate set according to a chosen score function\n",
      "Document 1697:\n",
      "to ultimately choose the best candidate with highest score.\n",
      "Document 1698:\n",
      "Prompt LLM to generate instruction candidates based\n",
      "Document 1699:\n",
      "on a small set of demonstrations in the form of\n",
      "Document 1700:\n",
      "input-output pairs. E.g.\n",
      "Document 1701:\n",
      "{{Given desired input-output\n",
      "Document 1702:\n",
      "pairs}}\\n\\nThe instruction is.\n",
      "Document 1703:\n",
      "Given a dataset of\n",
      "Document 1704:\n",
      "$\\mathcal\n",
      "Document 1705:\n",
      "{D}_\\text{\n",
      "Document 1706:\n",
      "train}\n",
      "Document 1707:\n",
      "= \\{(x, y)\\}$,\n",
      "Document 1708:\n",
      "we would like to find an instruction\n",
      "Document 1709:\n",
      "$\\rho$ such that\n",
      "Document 1710:\n",
      "$\\rho^* =\n",
      "Document 1711:\n",
      "\\arg\\max_\\rho\n",
      "Document 1712:\n",
      "\\mathbb{E\n",
      "Document 1713:\n",
      "}_{(x,\n",
      "Document 1714:\n",
      "y) \\in\n",
      "Document 1715:\n",
      "\\mathcal{\n",
      "Document 1716:\n",
      "D}_\\text{t\n",
      "Document 1717:\n",
      "rain}}\n",
      "Document 1718:\n",
      "[f(\\rho, x,\n",
      "Document 1719:\n",
      "y)]$, where $f(.)$ is\n",
      "Document 1720:\n",
      "a per-sample score function, such as execution\n",
      "Document 1721:\n",
      "accuracy\n",
      "Document 1722:\n",
      "$\\mathbb{\n",
      "Document 1723:\n",
      "1}[\\text{L\n",
      "Document 1724:\n",
      "M}(.\\vert\n",
      "Document 1725:\n",
      "\\rho, x)=y]$ or\n",
      "Document 1726:\n",
      "log probability:\n",
      "Document 1727:\n",
      "$p_\\text{\n",
      "Document 1728:\n",
      "LM}(y\n",
      "Document 1729:\n",
      "\\mid \\rho, x)$.\n",
      "Document 1730:\n",
      "Use an iterative Monte Carlo search method to\n",
      "Document 1731:\n",
      "improve the best candidates by proposing semantically similar variants\n",
      "Document 1732:\n",
      "via prompts like Generate a variation of the following\n",
      "Document 1733:\n",
      "instruction while keeping the semantic\n",
      "Document 1734:\n",
      "meaning.\\n\\nInput:\n",
      "Document 1735:\n",
      "...\\n\\nOutput:...\n",
      "Document 1736:\n",
      "To construct chain-of-thought prompts\n",
      "Document 1737:\n",
      "automatically, Shum et al.\n",
      "Document 1738:\n",
      "(2023) suggested\n",
      "Document 1739:\n",
      "augment-prune-select, a\n",
      "Document 1740:\n",
      "three-step process:\n",
      "Document 1741:\n",
      "Augment: Generate multiple pseudo-chains\n",
      "Document 1742:\n",
      "of thought given question using few-shot or\n",
      "Document 1743:\n",
      "zero-shot CoT prompts;\n",
      "Document 1744:\n",
      "Prune: Prune pseudo chains based on\n",
      "Document 1745:\n",
      "whether generated answers match ground truths.\n",
      "Document 1746:\n",
      "Select: Apply a variance-reduced policy\n",
      "Document 1747:\n",
      "gradient strategy to learn the probability distribution over selected\n",
      "Document 1748:\n",
      "examples, while considering the probability distribution over examples as\n",
      "Document 1749:\n",
      "policy and the validation set accuracy as reward.\n",
      "Document 1750:\n",
      "Zhang et al. (2023)\n",
      "Document 1751:\n",
      "instead adopted clustering techniques to sample questions and then\n",
      "Document 1752:\n",
      "generates chains. They observed that LLMs tend to\n",
      "Document 1753:\n",
      "make certain types of mistakes. One type of errors\n",
      "Document 1754:\n",
      "can be similar in the emebedding space\n",
      "Document 1755:\n",
      "and thus get grouped together. By only sampling one\n",
      "Document 1756:\n",
      "or a few from frequent-error clusters, we\n",
      "Document 1757:\n",
      "can prevent too many wrong demonstrations of one error type\n",
      "Document 1758:\n",
      "and collect a diverse set of examples.\n",
      "Document 1759:\n",
      "Question clustering: Embed questions and run\n",
      "Document 1760:\n",
      "$k$-means for clustering.\n",
      "Document 1761:\n",
      "Demonstration selection: Select a set of representative\n",
      "Document 1762:\n",
      "questions from each cluster; i.e. one\n",
      "Document 1763:\n",
      "demonstration from one cluster. Samples in each cluster\n",
      "Document 1764:\n",
      "are sorted by distance to the cluster centroid and\n",
      "Document 1765:\n",
      "those closer to the centroid are selected first.\n",
      "Document 1766:\n",
      "Rationale generation: Use zero-shot\n",
      "Document 1767:\n",
      "CoT to generate reasoning chains for selected questions and\n",
      "Document 1768:\n",
      "construct few-shot prompt to run inference.\n",
      "Document 1769:\n",
      "Augmented Language Models#\n",
      "Document 1770:\n",
      "A survey on augmented language models by\n",
      "Document 1771:\n",
      "Mialon et al. (2023)\n",
      "Document 1772:\n",
      "has great coverage over multiple categories of language models augmented\n",
      "Document 1773:\n",
      "with reasoning skills and the ability of using external\n",
      "Document 1774:\n",
      "tools. Recommend it.\n",
      "Document 1775:\n",
      "Retrieval#\n",
      "Document 1776:\n",
      "Often we need to complete tasks that require latest\n",
      "Document 1777:\n",
      "knowledge after the model pretraining time cutoff or\n",
      "Document 1778:\n",
      "internal/private knowledge base. In that case,\n",
      "Document 1779:\n",
      "the model would not know the context if we\n",
      "Document 1780:\n",
      "don’t explicitly provide it in the\n",
      "Document 1781:\n",
      "prompt. Many methods for Open Domain Question\n",
      "Document 1782:\n",
      "Answering depend on first doing retrieval over a\n",
      "Document 1783:\n",
      "knowledge base and then incorporating the retrieved content as part\n",
      "Document 1784:\n",
      "of the prompt. The accuracy of such a process\n",
      "Document 1785:\n",
      "depends on the quality of both retrieval and generation\n",
      "Document 1786:\n",
      "steps.\n",
      "Document 1787:\n",
      "Lazaridou et al.\n",
      "Document 1788:\n",
      "(2022) studied how to use Google Search\n",
      "Document 1789:\n",
      "for document retrieval to augment LLMs. Given a\n",
      "Document 1790:\n",
      "question $q$, clean text is extracted out of\n",
      "Document 1791:\n",
      "20 URLs returned by Google, resulting in a set\n",
      "Document 1792:\n",
      "of documents. Because these documents are long, each\n",
      "Document 1793:\n",
      "document is split into paragraphs of 6 sentences,\n",
      "Document 1794:\n",
      "$\\{p\\}$. Paragraphs are\n",
      "Document 1795:\n",
      "ranked by TF-IDF based cosine similarity\n",
      "Document 1796:\n",
      "between evidence paragraphs and the query. Only the most\n",
      "Document 1797:\n",
      "relevant paragraph is used in the prompt to produce an\n",
      "Document 1798:\n",
      "answer $a$.\n",
      "Document 1799:\n",
      "For closed-book QA, each demonstration\n",
      "Document 1800:\n",
      "is formatted as follows to construct few-shot\n",
      "Document 1801:\n",
      "prompts. Swapping the question with the evidence\n",
      "Document 1802:\n",
      "(longer distance between questions and answers) is\n",
      "Document 1803:\n",
      "found to consistently yield lower results across all datasets.\n",
      "Document 1804:\n",
      "Evidence: ...\n",
      "Question: ...\n",
      "Document 1805:\n",
      "Answer: ...\n",
      "Document 1806:\n",
      "The answer probability is computed in three ways:\n",
      "Document 1807:\n",
      "RAG style, $p(a_i\n",
      "Document 1808:\n",
      "\\mid q) =\n",
      "Document 1809:\n",
      "\\sum_{i=1}^n\n",
      "Document 1810:\n",
      "p_\\text{t\n",
      "Document 1811:\n",
      "f-idf}\n",
      "Document 1812:\n",
      "(p_i \\mid q)\n",
      "Document 1813:\n",
      "\\cdot\n",
      "Document 1814:\n",
      "p_\\text{L\n",
      "Document 1815:\n",
      "M}(a_i\n",
      "Document 1816:\n",
      "\\mid q, p_i)$, where\n",
      "Document 1817:\n",
      "$p_\\text{\n",
      "Document 1818:\n",
      "tf-idf}\n",
      "Document 1819:\n",
      "(p_i \\mid q)$ is\n",
      "Document 1820:\n",
      "the normalized cosine similarities between the\n",
      "Document 1821:\n",
      "TF-IDF passage and question representations.\n",
      "Document 1822:\n",
      "Noisy channel inference,\n",
      "Document 1823:\n",
      "$p(a_i\\mid q)\n",
      "Document 1824:\n",
      "=\n",
      "Document 1825:\n",
      "\\frac{p_\\\n",
      "Document 1826:\n",
      "text{LM}(q\n",
      "Document 1827:\n",
      "\\mid a_i, p_i)\n",
      "Document 1828:\n",
      "\\cdot\n",
      "Document 1829:\n",
      "p_\\text{L\n",
      "Document 1830:\n",
      "M}(a_i\n",
      "Document 1831:\n",
      "\\mid\n",
      "Document 1832:\n",
      "p_i)}{p_\\\n",
      "Document 1833:\n",
      "text{LM}(q\n",
      "Document 1834:\n",
      "\\mid p_i)}$\n",
      "Document 1835:\n",
      "Product-of-Experts (PoE),\n",
      "Document 1836:\n",
      "combines all probabilities used above in addition to\n",
      "Document 1837:\n",
      "$p_\\text{\n",
      "Document 1838:\n",
      "LM}(p_i\n",
      "Document 1839:\n",
      "\\mid q)$.\n",
      "Document 1840:\n",
      "According to their experiments on generation and classification\n",
      "Document 1841:\n",
      "tasks, among three answer reranking scores -\n",
      "Document 1842:\n",
      "PoE > Noisy channel > RAG.\n",
      "Document 1843:\n",
      "Among individual probabilities,\n",
      "Document 1844:\n",
      "$p_\\text{\n",
      "Document 1845:\n",
      "LM}(a\n",
      "Document 1846:\n",
      "\\mid q, p_i)$ and\n",
      "Document 1847:\n",
      "$p_\\text{\n",
      "Document 1848:\n",
      "LM}(q\n",
      "Document 1849:\n",
      "\\mid p_i, a)$ are\n",
      "Document 1850:\n",
      "found to be most informative.\n",
      "Document 1851:\n",
      "$p_\\text{\n",
      "Document 1852:\n",
      "LM}(q\n",
      "Document 1853:\n",
      "\\mid p_i, a)$ captures\n",
      "Document 1854:\n",
      "how well the question can be explained by LM given\n",
      "Document 1855:\n",
      "evidence paragraph and answer and can reliably be used for\n",
      "Document 1856:\n",
      "reranking answer candidates.\n",
      "Document 1857:\n",
      "One observation with SituatedQA dataset for\n",
      "Document 1858:\n",
      "questions grounded in different dates is that despite LM\n",
      "Document 1859:\n",
      "(pretraining cutoff is year 2020) has access\n",
      "Document 1860:\n",
      "to latest information via Google Search, its performance on\n",
      "Document 1861:\n",
      "post-2020 questions are still a lot worse than\n",
      "Document 1862:\n",
      "on pre-2020 questions. This suggests the existence\n",
      "Document 1863:\n",
      "of some discrepencies or conflicting parametric\n",
      "Document 1864:\n",
      "between contextual information and model internal knowledge.\n",
      "Document 1865:\n",
      "Interestingly it is found to be beneficial even with\n",
      "Document 1866:\n",
      "only “internal retrieval”, that\n",
      "Document 1867:\n",
      "is, to generate knowledge about a topic before answering\n",
      "Document 1868:\n",
      "the question (Liu et al. 2022).\n",
      "Document 1869:\n",
      "First we can use  the following template to extract\n",
      "Document 1870:\n",
      "knowledge:\n",
      "Document 1871:\n",
      "Generate some knowledge about the input.\n",
      "Document 1872:\n",
      "Examples:\n",
      "Document 1873:\n",
      "Input: What type of water formation is formed\n",
      "Document 1874:\n",
      "by clouds?\n",
      "Document 1875:\n",
      "Knowledge: Clouds are made of water\n",
      "Document 1876:\n",
      "vapor.\n",
      "Document 1877:\n",
      "Input: {question}\n",
      "Document 1878:\n",
      "Knowledge:\n",
      "Document 1879:\n",
      "And then with model-generated knowledge, prompt\n",
      "Document 1880:\n",
      "the LM further to get the answer.\n",
      "Document 1881:\n",
      "Programming Language#\n",
      "Document 1882:\n",
      "Both PAL (Program-aided language\n",
      "Document 1883:\n",
      "models); Gao et al. 2022) and\n",
      "Document 1884:\n",
      "PoT (Program of Thoughts prompting; Chen et\n",
      "Document 1885:\n",
      "al. 2022) ask LLM to generate programming\n",
      "Document 1886:\n",
      "language statements to resolve natural language reasoning problems, hence\n",
      "Document 1887:\n",
      "offloading the solution step to a runtime such as\n",
      "Document 1888:\n",
      "a Python interpreter. Such setup decouples complex\n",
      "Document 1889:\n",
      "computation and reasoning. It relies on a LM with\n",
      "Document 1890:\n",
      "good enough coding skills.\n",
      "Document 1891:\n",
      "Fig. 3. Comparing CoT and\n",
      "Document 1892:\n",
      "PoT. (Image source: Chen et\n",
      "Document 1893:\n",
      "al. 2022).\n",
      "Document 1894:\n",
      "External APIs#\n",
      "Document 1895:\n",
      "TALM (Tool Augmented Language\n",
      "Document 1896:\n",
      "Models; Parisi et al. 2022) is\n",
      "Document 1897:\n",
      "a language model augmented with text-to-text\n",
      "Document 1898:\n",
      "API calls. LM is guided to generate\n",
      "Document 1899:\n",
      "|tool-call and tool input text conditioned on\n",
      "Document 1900:\n",
      "task input text to construct API call requests. When\n",
      "Document 1901:\n",
      "|result shows up, the specified tool API is\n",
      "Document 1902:\n",
      "called and the returned result gets appended to the\n",
      "Document 1903:\n",
      "text sequence. The final output is generated following\n",
      "Document 1904:\n",
      "|output token.\n",
      "Document 1905:\n",
      "Fig. 4. The format of API calls\n",
      "Document 1906:\n",
      "in TALM. (Image source:\n",
      "Document 1907:\n",
      "Parisi et al. 2022).\n",
      "Document 1908:\n",
      "TALM adopts a self-play\n",
      "Document 1909:\n",
      "approach to iteratively bootstrap the dataset of tool\n",
      "Document 1910:\n",
      "use examples and finetune LM with it.\n",
      "Document 1911:\n",
      "This self-play, defined as a model interacting\n",
      "Document 1912:\n",
      "with a tool API, iteratively expands the dataset\n",
      "Document 1913:\n",
      "based on whether a newly added tool API can improve\n",
      "Document 1914:\n",
      "the model outputs. Same idea is adopted in\n",
      "Document 1915:\n",
      "Toolformer too, described in more details below.\n",
      "Document 1916:\n",
      "The pipeline loosely mimics a RL process where LM\n",
      "Document 1917:\n",
      "is the policy network and it is trained by policy\n",
      "Document 1918:\n",
      "gradient with a binary reward signal.\n",
      "Document 1919:\n",
      "Fig. 5. Self-play iterations help\n",
      "Document 1920:\n",
      "boost the model performance.(Image source: Parisi\n",
      "Document 1921:\n",
      "et al. 2022).\n",
      "Document 1922:\n",
      "Toolformer (Schick et al.\n",
      "Document 1923:\n",
      "2023) is a LM that can use external\n",
      "Document 1924:\n",
      "tools via simple APIs, which is built in a\n",
      "Document 1925:\n",
      "self-supervised manner and only requires a handful\n",
      "Document 1926:\n",
      "of demonstrations for each API. The toolbox of\n",
      "Document 1927:\n",
      "Toolformer includes:\n",
      "Document 1928:\n",
      "Calculator to help LM with the lack\n",
      "Document 1929:\n",
      "of precise math skills;\n",
      "Document 1930:\n",
      "Q&A system to help with\n",
      "Document 1931:\n",
      "unfaithful content and hallucination;\n",
      "Document 1932:\n",
      "Search engine to provide up-to-date\n",
      "Document 1933:\n",
      "information after pretraining cut off time;\n",
      "Document 1934:\n",
      "Translation system to improve performance on low resource\n",
      "Document 1935:\n",
      "language;\n",
      "Document 1936:\n",
      "Calendar to make LM be aware of time\n",
      "Document 1937:\n",
      "progression.\n",
      "Document 1938:\n",
      "Fig. 6. Illustration of how to\n",
      "Document 1939:\n",
      "build Toolformer.(Image source: Schick et\n",
      "Document 1940:\n",
      "al. 2023).\n",
      "Document 1941:\n",
      "Toolformer is trained as follows:\n",
      "Document 1942:\n",
      "Prompting to annotate potential API\n",
      "Document 1943:\n",
      "calls. Ask a pre-trained LM to\n",
      "Document 1944:\n",
      "annotate a dataset via few-shot learning with\n",
      "Document 1945:\n",
      "API call usage examples. Formatting example:\n",
      "Document 1946:\n",
      "Fig. 7. How dataset is annotated\n",
      "Document 1947:\n",
      "to do API calls.(Image source: Schick\n",
      "Document 1948:\n",
      "et al. 2023).\n",
      "Document 1949:\n",
      "Each API call is represented as a tuple of\n",
      "Document 1950:\n",
      "(API name, corresponding input),\n",
      "Document 1951:\n",
      "$c=(a_c,\n",
      "Document 1952:\n",
      "i_c)$ and its corresponding result is\n",
      "Document 1953:\n",
      "denoted as $r$. The API call sequences\n",
      "Document 1954:\n",
      "with and without results are labeled as follows,\n",
      "Document 1955:\n",
      "respectively:\n",
      "Document 1956:\n",
      "$$\n",
      "Document 1957:\n",
      "\\begin{aligned}\n",
      "Document 1958:\n",
      "e(c) &=\n",
      "Document 1959:\n",
      "\\langle\\t\n",
      "Document 1960:\n",
      "exttt{API}\n",
      "Document 1961:\n",
      "\\rangle\n",
      "Document 1962:\n",
      "a_c(i_c)\n",
      "Document 1963:\n",
      "\\langle\\t\n",
      "Document 1964:\n",
      "exttt{/API\n",
      "Document 1965:\n",
      "}\\rangle\n",
      "Document 1966:\n",
      "\\\\\n",
      "Document 1967:\n",
      "e(c, r) &=\n",
      "Document 1968:\n",
      "\\langle\\t\n",
      "Document 1969:\n",
      "exttt{API}\n",
      "Document 1970:\n",
      "\\rangle\n",
      "Document 1971:\n",
      "a_c(i_c) \\to\n",
      "Document 1972:\n",
      "r\n",
      "Document 1973:\n",
      "\\langle\\t\n",
      "Document 1974:\n",
      "exttt{/API\n",
      "Document 1975:\n",
      "}\\rangle\n",
      "Document 1976:\n",
      "\\end{aligned}\n",
      "  $$\n",
      "Document 1977:\n",
      "Sample API calls based on the probabilities\n",
      "Document 1978:\n",
      "$p_\\text{\n",
      "Document 1979:\n",
      "LM}(\\langl\n",
      "Document 1980:\n",
      "e\\texttt{A\n",
      "Document 1981:\n",
      "PI}\\rangle\n",
      "Document 1982:\n",
      "\\mid\n",
      "Document 1983:\n",
      "\\text{pro\n",
      "Document 1984:\n",
      "mpt}(\\math\n",
      "Document 1985:\n",
      "bf{x}),\n",
      "Document 1986:\n",
      "\\mathbf{x\n",
      "Document 1987:\n",
      "}_{1:i})$\n",
      "Document 1988:\n",
      "and select top $k$ candidate positions for doing\n",
      "Document 1989:\n",
      "API calls at position $i$ if the probability\n",
      "Document 1990:\n",
      "is larger than a threshold.\n",
      "Document 1991:\n",
      "Then we sample potential API calls from the LM\n",
      "Document 1992:\n",
      "given the sequence\n",
      "Document 1993:\n",
      "$[\\text{p\n",
      "Document 1994:\n",
      "rompt}(\\ma\n",
      "Document 1995:\n",
      "thbf{x}),\n",
      "Document 1996:\n",
      "x_1, \\dots,\n",
      "Document 1997:\n",
      "x_{i-1},\n",
      "Document 1998:\n",
      "\\langle\\t\n",
      "Document 1999:\n",
      "exttt{API}\n",
      "Document 2000:\n",
      "\\rangle]$\n",
      "Document 2001:\n",
      "as prefix and\n",
      "Document 2002:\n",
      "$\\langle\\\n",
      "Document 2003:\n",
      "texttt{/AP\n",
      "Document 2004:\n",
      "I}\\rangle$\n",
      "Document 2005:\n",
      "as suffix.\n",
      "Document 2006:\n",
      "Filter annotations based on whether API calls help model\n",
      "Document 2007:\n",
      "predict future tokens. Use a self-supervised\n",
      "Document 2008:\n",
      "loss to decide which API calls are actually helpful.\n",
      "Document 2009:\n",
      "Execute each API call\n",
      "Document 2010:\n",
      "$c_i$ to get corresponding result\n",
      "Document 2011:\n",
      "$r_i$.\n",
      "Document 2012:\n",
      "Compute weighted cross entropy loss for the LM\n",
      "Document 2013:\n",
      "over tokens $x_i,\n",
      "Document 2014:\n",
      "\\dots, x_n$ when the\n",
      "Document 2015:\n",
      "model is prefixed with the prompt. Two versions\n",
      "Document 2016:\n",
      "are computed, one with API result and the other\n",
      "Document 2017:\n",
      "with empty sequence $\\varepsilon$.\n",
      "Document 2018:\n",
      "$$\n",
      "Document 2019:\n",
      "\\begin{aligned}\n",
      "Document 2020:\n",
      "L^+_i &=\n",
      "Document 2021:\n",
      "L_i(e(c_i\n",
      "Document 2022:\n",
      ",\n",
      "Document 2023:\n",
      "r_i)) \\\\\n",
      "Document 2024:\n",
      "L^-_i &=\n",
      "Document 2025:\n",
      "\\min(L_i(\n",
      "Document 2026:\n",
      "\\varepsilo\n",
      "Document 2027:\n",
      "n),\n",
      "Document 2028:\n",
      "L_i(e(c_i\n",
      "Document 2029:\n",
      ",\n",
      "Document 2030:\n",
      "\\varepsilon))) \\\\\n",
      "Document 2031:\n",
      "\\end{aligned}\n",
      "  $$\n",
      "Document 2032:\n",
      "Only API calls with\n",
      "Document 2033:\n",
      "$L^-_i -\n",
      "Document 2034:\n",
      "L^+_i$ larger than a threshold\n",
      "Document 2035:\n",
      "are kept, meaning that adding this API call and\n",
      "Document 2036:\n",
      "its results help the model predict future tokens.\n",
      "Document 2037:\n",
      "Fine-tune LM on this annotated\n",
      "Document 2038:\n",
      "dataset. The new training sequences are constructed as\n",
      "Document 2039:\n",
      "$\\mathbf{x}^* =\n",
      "Document 2040:\n",
      "x_{1:i-1},\n",
      "Document 2041:\n",
      "e(c_i, r_i),\n",
      "Document 2042:\n",
      "x_{i:n}$ . The training\n",
      "Document 2043:\n",
      "data is a combination of the original dataset\n",
      "Document 2044:\n",
      "(e.g. a subset of\n",
      "Document 2045:\n",
      "CCNet, as in the paper) and its\n",
      "Document 2046:\n",
      "augmented version.\n",
      "Document 2047:\n",
      "At inference time, decoding runs until the model\n",
      "Document 2048:\n",
      "produces “$\\to$ \" token,\n",
      "Document 2049:\n",
      "indicating that it is expecting response from an API call\n",
      "Document 2050:\n",
      "next.\n",
      "Document 2051:\n",
      "Toolformer currently does not support tool use in\n",
      "Document 2052:\n",
      "a chain (i.e. using the output\n",
      "Document 2053:\n",
      "of one tool as an input for another tool)\n",
      "Document 2054:\n",
      "or in an interactive way (i.e.\n",
      "Document 2055:\n",
      "adopt API response after human selection). Both are interesting\n",
      "Document 2056:\n",
      "future directions to expand the model for.\n",
      "Document 2057:\n",
      "Citation#\n",
      "Cited as:\n",
      "Document 2058:\n",
      "Weng, Lilian. (Mar\n",
      "Document 2059:\n",
      "2023). Prompt Engineering.\n",
      "Document 2060:\n",
      "Lil’Log.\n",
      "Document 2061:\n",
      "https://l\n",
      "Document 2062:\n",
      "ilianweng.\n",
      "Document 2063:\n",
      "github.io/\n",
      "Document 2064:\n",
      "posts/2023\n",
      "Document 2065:\n",
      "-03-15-pro\n",
      "Document 2066:\n",
      "mpt-engine\n",
      "Document 2067:\n",
      "ering/.\n",
      "Document 2068:\n",
      "Or\n",
      "Document 2069:\n",
      "@article{\n",
      "Document 2070:\n",
      "weng2023pr\n",
      "Document 2071:\n",
      "ompt,\n",
      "Document 2072:\n",
      "title   = \"Prompt\n",
      "Document 2073:\n",
      "Engineering\",\n",
      "Document 2074:\n",
      "author  = \"Weng,\n",
      "Document 2075:\n",
      "Lilian\",\n",
      "Document 2076:\n",
      "journal =\n",
      "Document 2077:\n",
      "\"lilianwe\n",
      "Document 2078:\n",
      "ng.github.\n",
      "Document 2079:\n",
      "io\",\n",
      "Document 2080:\n",
      "year    =\n",
      "Document 2081:\n",
      "\"2023\",\n",
      "Document 2082:\n",
      "month   = \"Mar\",\n",
      "Document 2083:\n",
      "url     =\n",
      "Document 2084:\n",
      "\"https://\n",
      "Document 2085:\n",
      "lilianweng\n",
      "Document 2086:\n",
      ".github.io\n",
      "Document 2087:\n",
      "/posts/202\n",
      "Document 2088:\n",
      "3-03-15-pr\n",
      "Document 2089:\n",
      "ompt-engin\n",
      "Document 2090:\n",
      "eering/\"\n",
      "Document 2091:\n",
      "}\n",
      "Useful Resources#\n",
      "Document 2092:\n",
      "OpenAI Cookbook has many in-depth\n",
      "Document 2093:\n",
      "examples for how to utilize LLM efficiently.\n",
      "Document 2094:\n",
      "LangChain, a library for combining language\n",
      "Document 2095:\n",
      "models with other components to build applications.\n",
      "Document 2096:\n",
      "Prompt Engineering Guide repo contains a pretty comprehensive\n",
      "Document 2097:\n",
      "collection of education materials on prompt engineering.\n",
      "Document 2098:\n",
      "learnprompting.org\n",
      "Document 2099:\n",
      "PromptPerfect\n",
      "Semantic Kernel\n",
      "Document 2100:\n",
      "References#\n",
      "Document 2101:\n",
      "[1] Zhao et al.\n",
      "Document 2102:\n",
      "“Calibrate Before Use: Improving\n",
      "Document 2103:\n",
      "Few-shot Performance of Language Models.”\n",
      "Document 2104:\n",
      "ICML 2021\n",
      "Document 2105:\n",
      "[2] Liu et al.\n",
      "Document 2106:\n",
      "“What Makes Good In-Context Examples for\n",
      "Document 2107:\n",
      "GPT-3?” arXiv\n",
      "Document 2108:\n",
      "preprint\n",
      "Document 2109:\n",
      "arXiv:2101.06804\n",
      "Document 2110:\n",
      "(2021).\n",
      "Document 2111:\n",
      "[3] Lu et al.\n",
      "Document 2112:\n",
      "“Fantastically Ordered Prompts and\n",
      "Document 2113:\n",
      "Where to Find Them: Overcoming Few-Shot\n",
      "Document 2114:\n",
      "Prompt Order Sensitivity.” ACL 2022\n",
      "Document 2115:\n",
      "[4] Ye et al.\n",
      "Document 2116:\n",
      "“In-Context Instruction Learning.”\n",
      "Document 2117:\n",
      "arXiv preprint\n",
      "Document 2118:\n",
      "arXiv:2302.14691\n",
      "Document 2119:\n",
      "(2023).\n",
      "Document 2120:\n",
      "[5] Su et al.\n",
      "Document 2121:\n",
      "“Selective annotation makes language models better\n",
      "Document 2122:\n",
      "few-shot learners.” arXiv\n",
      "Document 2123:\n",
      "preprint\n",
      "Document 2124:\n",
      "arXiv:2209.01975\n",
      "Document 2125:\n",
      "(2022).\n",
      "Document 2126:\n",
      "[6] Rubin et al.\n",
      "Document 2127:\n",
      "“Learning to retrieve prompts for in-context\n",
      "Document 2128:\n",
      "learning.” NAACL-HLT\n",
      "Document 2129:\n",
      "2022\n",
      "Document 2130:\n",
      "[7] Wei et al.\n",
      "Document 2131:\n",
      "“Chain of thought prompting elicits reasoning in\n",
      "Document 2132:\n",
      "large language models.” NeurIPS 2022\n",
      "Document 2133:\n",
      "[8] Wang et al.\n",
      "Document 2134:\n",
      "“Self-Consistency Improves Chain\n",
      "Document 2135:\n",
      "of Thought Reasoning in Language Models.”\n",
      "Document 2136:\n",
      "ICLR 2023.\n",
      "Document 2137:\n",
      "[9] Diao et al.\n",
      "Document 2138:\n",
      "“Active Prompting with\n",
      "Document 2139:\n",
      "Chain-of-Thought for Large Language\n",
      "Document 2140:\n",
      "Models.” arXiv preprint\n",
      "Document 2141:\n",
      "arXiv:2302.12246\n",
      "Document 2142:\n",
      "(2023).\n",
      "Document 2143:\n",
      "[10] Zelikman et al.\n",
      "Document 2144:\n",
      "“STaR: Bootstrapping\n",
      "Document 2145:\n",
      "Reasoning With Reasoning.”\n",
      "Document 2146:\n",
      "arXiv preprint\n",
      "Document 2147:\n",
      "arXiv:2203.14465\n",
      "Document 2148:\n",
      "(2022).\n",
      "Document 2149:\n",
      "[11] Ye & Durrett.\n",
      "Document 2150:\n",
      "“The unreliability of explanations in\n",
      "Document 2151:\n",
      "few-shot in-context learning.”\n",
      "Document 2152:\n",
      "arXiv preprint\n",
      "Document 2153:\n",
      "arXiv:2205.03401\n",
      "Document 2154:\n",
      "(2022).\n",
      "Document 2155:\n",
      "[12] Trivedi et al.\n",
      "Document 2156:\n",
      "“Interleaving retrieval with\n",
      "Document 2157:\n",
      "chain-of-thought reasoning for knowledge-intensive\n",
      "Document 2158:\n",
      "multi-step questions.” arXiv\n",
      "Document 2159:\n",
      "preprint\n",
      "Document 2160:\n",
      "arXiv:2212.10509\n",
      "Document 2161:\n",
      "(2022).\n",
      "Document 2162:\n",
      "[13] Press et al.\n",
      "Document 2163:\n",
      "“Measuring and narrowing the compositionality gap\n",
      "Document 2164:\n",
      "in language models.” arXiv\n",
      "Document 2165:\n",
      "preprint\n",
      "Document 2166:\n",
      "arXiv:2210.03350\n",
      "Document 2167:\n",
      "(2022).\n",
      "Document 2168:\n",
      "[14] Yao et al.\n",
      "Document 2169:\n",
      "“ReAct: Synergizing reasoning and\n",
      "Document 2170:\n",
      "acting in language models.” ICLR\n",
      "Document 2171:\n",
      "2023.\n",
      "Document 2172:\n",
      "[15] Fu et al.\n",
      "Document 2173:\n",
      "“Complexity-based prompting for\n",
      "Document 2174:\n",
      "multi-step reasoning.” arXiv\n",
      "Document 2175:\n",
      "preprint\n",
      "Document 2176:\n",
      "arXiv:2210.00720\n",
      "Document 2177:\n",
      "(2022).\n",
      "Document 2178:\n",
      "[16] Wang et al.\n",
      "Document 2179:\n",
      "“Rationale-augmented\n",
      "Document 2180:\n",
      "ensembles in language models.”\n",
      "Document 2181:\n",
      "arXiv preprint\n",
      "Document 2182:\n",
      "arXiv:2207.00747\n",
      "Document 2183:\n",
      "(2022).\n",
      "Document 2184:\n",
      "[17] Zhang et al.\n",
      "Document 2185:\n",
      "“Automatic chain of thought prompting in large\n",
      "Document 2186:\n",
      "language models.” arXiv preprint\n",
      "Document 2187:\n",
      "arXiv:2210.03493\n",
      "Document 2188:\n",
      "(2022).\n",
      "Document 2189:\n",
      "[18] Shum et al.\n",
      "Document 2190:\n",
      "“Automatic Prompt Augmentation and Selection with\n",
      "Document 2191:\n",
      "Chain-of-Thought from Labeled\n",
      "Document 2192:\n",
      "Data.” arXiv preprint\n",
      "Document 2193:\n",
      "arXiv:2302.12822\n",
      "Document 2194:\n",
      "(2023).\n",
      "Document 2195:\n",
      "[19] Zhou et al.\n",
      "Document 2196:\n",
      "“Large Language Models Are Human-Level Prompt\n",
      "Document 2197:\n",
      "Engineers.” ICLR 2023.\n",
      "Document 2198:\n",
      "[20] Lazaridou et\n",
      "Document 2199:\n",
      "al. “Internet augmented language models through\n",
      "Document 2200:\n",
      "few-shot prompting for open-domain question\n",
      "Document 2201:\n",
      "answering.” arXiv preprint\n",
      "Document 2202:\n",
      "arXiv:2203.05115\n",
      "Document 2203:\n",
      "(2022).\n",
      "Document 2204:\n",
      "[21] Chen et al.\n",
      "Document 2205:\n",
      "“Program of Thoughts Prompting:\n",
      "Document 2206:\n",
      "Disentangling Computation from Reasoning for\n",
      "Document 2207:\n",
      "Numerical Reasoning Tasks.”\n",
      "Document 2208:\n",
      "arXiv preprint\n",
      "Document 2209:\n",
      "arXiv:2211.12588\n",
      "Document 2210:\n",
      "(2022).\n",
      "Document 2211:\n",
      "[22] Gao et al.\n",
      "Document 2212:\n",
      "“PAL: Program-aided language\n",
      "Document 2213:\n",
      "models.” arXiv preprint\n",
      "Document 2214:\n",
      "arXiv:2211.10435\n",
      "Document 2215:\n",
      "(2022).\n",
      "Document 2216:\n",
      "[23] Parisi et al.\n",
      "Document 2217:\n",
      "“TALM: Tool Augmented Language\n",
      "Document 2218:\n",
      "Models” arXiv preprint\n",
      "Document 2219:\n",
      "arXiv:2205.12255\n",
      "Document 2220:\n",
      "(2022).\n",
      "Document 2221:\n",
      "[24] Schick et al.\n",
      "Document 2222:\n",
      "“Toolformer: Language Models Can Teach\n",
      "Document 2223:\n",
      "Themselves to Use Tools.”\n",
      "Document 2224:\n",
      "arXiv preprint\n",
      "Document 2225:\n",
      "arXiv:2302.04761\n",
      "Document 2226:\n",
      "(2023).\n",
      "Document 2227:\n",
      "[25] Mialon et al.\n",
      "Document 2228:\n",
      "“Augmented Language Models: a\n",
      "Document 2229:\n",
      "Survey” arXiv preprint\n",
      "Document 2230:\n",
      "arXiv:230\n",
      "Document 2231:\n",
      "2.07842\n",
      "Document 2232:\n",
      "(2023).\n",
      "Document 2233:\n",
      "[26] Yao et al.\n",
      "Document 2234:\n",
      "“Tree of Thoughts: Deliberate Problem\n",
      "Document 2235:\n",
      "Solving with Large Language Models.”\n",
      "Document 2236:\n",
      "arXiv preprint\n",
      "Document 2237:\n",
      "arXiv:2305.10601\n",
      "Document 2238:\n",
      "(2023).\n",
      "Document 2239:\n",
      "nlp\n",
      "language-model\n",
      "Document 2240:\n",
      "alignment\n",
      "steerability\n",
      "Document 2241:\n",
      "prompting\n",
      "Document 2242:\n",
      "«\n",
      "Document 2243:\n",
      "LLM Powered Autonomous Agents\n",
      "Document 2244:\n",
      "»\n",
      "Document 2245:\n",
      "The Transformer Family Version 2.0\n",
      "Document 2246:\n",
      "© 2024 Lil'Log\n",
      "Document 2247:\n",
      "Powered by\n",
      "Document 2248:\n",
      "Hugo &\n",
      "Document 2249:\n",
      "PaperMod\n",
      "Document 2250:\n",
      "Adversarial Attacks on LLMs |\n",
      "Document 2251:\n",
      "Lil'Log\n",
      "Document 2252:\n",
      "Lil'Log\n",
      "Document 2253:\n",
      "Posts\n",
      "Document 2254:\n",
      "Archive\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "Search\n",
      "Document 2255:\n",
      "Tags\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "FAQ\n",
      "Document 2256:\n",
      "emojisearch.app\n",
      "Document 2257:\n",
      "Adversarial Attacks\n",
      "Document 2258:\n",
      "on LLMs\n",
      "Document 2259:\n",
      "Date: October 25, 2023  |\n",
      "Document 2260:\n",
      "Estimated Reading Time: 33 min  |\n",
      "Document 2261:\n",
      "Author: Lilian Weng\n",
      "Document 2262:\n",
      "Table of Contents\n",
      "Document 2263:\n",
      "Basics\n",
      "\n",
      "Threat Model\n",
      "Document 2264:\n",
      "Classification\n",
      "\n",
      "Text Generation\n",
      "Document 2265:\n",
      "White-box vs Black-box\n",
      "Document 2266:\n",
      "Types of Adversarial Attacks\n",
      "Document 2267:\n",
      "Token Manipulation\n",
      "Document 2268:\n",
      "Gradient based Attacks\n",
      "Document 2269:\n",
      "Jailbreak Prompting\n",
      "Document 2270:\n",
      "Humans in the Loop Red-teaming\n",
      "Document 2271:\n",
      "Model Red-teaming\n",
      "Document 2272:\n",
      "Peek into Mitigation\n",
      "Document 2273:\n",
      "Saddle Point Problem\n",
      "Document 2274:\n",
      "Some work on LLM Robustness\n",
      "Document 2275:\n",
      "Citation\n",
      "\n",
      "References\n",
      "Document 2276:\n",
      "The use of large language models in the real\n",
      "Document 2277:\n",
      "world has strongly accelerated by the launch of\n",
      "Document 2278:\n",
      "ChatGPT. We (including my team at\n",
      "Document 2279:\n",
      "OpenAI, shoutout to them) have invested\n",
      "Document 2280:\n",
      "a lot of effort to build default safe behavior into\n",
      "Document 2281:\n",
      "the model during the alignment process\n",
      "Document 2282:\n",
      "(e.g. via RLHF).\n",
      "Document 2283:\n",
      "However, adversarial attacks or jailbreak prompts could\n",
      "Document 2284:\n",
      "potentially trigger the model to output something undesired.\n",
      "Document 2285:\n",
      "A large body of ground work on adversarial\n",
      "Document 2286:\n",
      "attacks is on images, and differently it operates in\n",
      "Document 2287:\n",
      "the continuous, high-dimensional space. Attacks for\n",
      "Document 2288:\n",
      "discrete data like text have been considered to be a\n",
      "Document 2289:\n",
      "lot more challenging, due to lack of direct gradient\n",
      "Document 2290:\n",
      "signals. My past post on Controllable Text\n",
      "Document 2291:\n",
      "Generation is quite relevant to this topic, as attacking\n",
      "Document 2292:\n",
      "LLMs is essentially to control the model to output\n",
      "Document 2293:\n",
      "a certain type of (unsafe) content.\n",
      "Document 2294:\n",
      "There is also a branch of work on attacking\n",
      "Document 2295:\n",
      "LLMs to extract pre-training data, private\n",
      "Document 2296:\n",
      "knowledge (Carlini et al, 2020)\n",
      "Document 2297:\n",
      "or attacking model training process via data poisoning\n",
      "Document 2298:\n",
      "(Carlini et al. 2023).\n",
      "Document 2299:\n",
      "We would not cover those topics in this post.\n",
      "Document 2300:\n",
      "Basics#\n",
      "Threat Model#\n",
      "Document 2301:\n",
      "Adversarial attacks are inputs that trigger the\n",
      "Document 2302:\n",
      "model to output something undesired. Much early literature\n",
      "Document 2303:\n",
      "focused on classification tasks, while recent effort starts to\n",
      "Document 2304:\n",
      "investigate more into outputs of generative models. In\n",
      "Document 2305:\n",
      "the context of large language models In this post we\n",
      "Document 2306:\n",
      "assume the attacks only happen at inference time, meaning\n",
      "Document 2307:\n",
      "that model weights are fixed.\n",
      "Document 2308:\n",
      "Fig. 1. An overview of threats to\n",
      "Document 2309:\n",
      "LLM-based applications. (Image source:\n",
      "Document 2310:\n",
      "Greshake et al. 2023)\n",
      "Document 2311:\n",
      "Classification#\n",
      "Document 2312:\n",
      "Adversarial attacks on classifiers have attracted\n",
      "Document 2313:\n",
      "more attention in the research community in the past,\n",
      "Document 2314:\n",
      "many in the image domain. LLMs can be\n",
      "Document 2315:\n",
      "used for classification too. Given an input\n",
      "Document 2316:\n",
      "$\\mathbf{x}$ and a\n",
      "Document 2317:\n",
      "classifier $f(.)$, we would like\n",
      "Document 2318:\n",
      "to find an adversarial version of the input,\n",
      "Document 2319:\n",
      "denoted as\n",
      "Document 2320:\n",
      "$\\mathbf{\n",
      "Document 2321:\n",
      "x}_\\text{a\n",
      "Document 2322:\n",
      "dv}$,\n",
      "Document 2323:\n",
      "with imperceptible difference from\n",
      "Document 2324:\n",
      "$\\mathbf{x}$, such that\n",
      "Document 2325:\n",
      "$f(\\mathbf{x})\n",
      "Document 2326:\n",
      "\\neq\n",
      "Document 2327:\n",
      "f(\\mathbf\n",
      "Document 2328:\n",
      "{x}_\\text{\n",
      "Document 2329:\n",
      "adv})$.\n",
      "Document 2330:\n",
      "Text Generation#\n",
      "Document 2331:\n",
      "Given an input\n",
      "Document 2332:\n",
      "$\\mathbf{x}$ and a\n",
      "Document 2333:\n",
      "generative model $p(.)$, we have\n",
      "Document 2334:\n",
      "the model output a sample\n",
      "Document 2335:\n",
      "$\\mathbf{y} \\sim\n",
      "Document 2336:\n",
      "p(.\\vert\\\n",
      "Document 2337:\n",
      "mathbf{x})\n",
      "Document 2338:\n",
      "$\n",
      "Document 2339:\n",
      ". An adversarial attack would identify such\n",
      "Document 2340:\n",
      "$p(\\mathbf{x})$ that\n",
      "Document 2341:\n",
      "$\\mathbf{y}$ would violate the\n",
      "Document 2342:\n",
      "built-in safe behavior of the model\n",
      "Document 2343:\n",
      "$p$; E.g. output unsafe\n",
      "Document 2344:\n",
      "content on illegal topics, leak private information or model\n",
      "Document 2345:\n",
      "training data. For generative tasks, it is\n",
      "Document 2346:\n",
      "not easy to judge the success of an attack,\n",
      "Document 2347:\n",
      "which demands a super high-quality classifier to\n",
      "Document 2348:\n",
      "judge whether $\\mathbf{y}$ is\n",
      "Document 2349:\n",
      "unsafe or human review.\n",
      "Document 2350:\n",
      "White-box vs Black-box#\n",
      "Document 2351:\n",
      "White-box attacks assume that attackers have full\n",
      "Document 2352:\n",
      "access to the model weights, architecture and training\n",
      "Document 2353:\n",
      "pipeline, such that attackers can obtain gradient signals.\n",
      "Document 2354:\n",
      "We don’t assume attackers have access to\n",
      "Document 2355:\n",
      "the full training data. This is only possible for\n",
      "Document 2356:\n",
      "open-sourced models.\n",
      "Document 2357:\n",
      "Black-box attacks assume that attackers only have\n",
      "Document 2358:\n",
      "access to an API-like service where they provide\n",
      "Document 2359:\n",
      "input $\\mathbf{x}$ and get\n",
      "Document 2360:\n",
      "back sample $\\mathbf{y}$, without\n",
      "Document 2361:\n",
      "knowing further information about the model.\n",
      "Document 2362:\n",
      "Types of Adversarial Attacks#\n",
      "Document 2363:\n",
      "There are various means to find adversarial inputs\n",
      "Document 2364:\n",
      "to trigger LLMs to output something undesired.\n",
      "Document 2365:\n",
      "We present five approaches here.\n",
      "Document 2366:\n",
      "Attack\n",
      "Type\n",
      "Description\n",
      "Document 2367:\n",
      "Token manipulation\n",
      "Black-box\n",
      "Document 2368:\n",
      "Alter a small fraction of tokens in the\n",
      "Document 2369:\n",
      "text input such that it triggers model failure but still\n",
      "Document 2370:\n",
      "remain its original semantic meanings.\n",
      "Document 2371:\n",
      "Gradient based attack\n",
      "Document 2372:\n",
      "White-box\n",
      "Document 2373:\n",
      "Rely on gradient signals to learn an effective\n",
      "Document 2374:\n",
      "attack.\n",
      "Document 2375:\n",
      "Jailbreak prompting\n",
      "Document 2376:\n",
      "Black-box\n",
      "Document 2377:\n",
      "Often heuristic based prompting to\n",
      "Document 2378:\n",
      "“jailbreak” built-in\n",
      "Document 2379:\n",
      "model safety.\n",
      "Document 2380:\n",
      "Human red-teaming\n",
      "Document 2381:\n",
      "Black-box\n",
      "Document 2382:\n",
      "Human attacks the model, with or without assist\n",
      "Document 2383:\n",
      "from other models.\n",
      "Document 2384:\n",
      "Model red-teaming\n",
      "Document 2385:\n",
      "Black-box\n",
      "Document 2386:\n",
      "Model attacks the model, where the attacker model\n",
      "Document 2387:\n",
      "can be fine-tuned.\n",
      "Document 2388:\n",
      "Token Manipulation#\n",
      "Document 2389:\n",
      "Given a piece of text input containing a sequence\n",
      "Document 2390:\n",
      "of tokens, we can apply simple token operations like\n",
      "Document 2391:\n",
      "replacement with synonyms to trigger the model to make\n",
      "Document 2392:\n",
      "the incorrect predictions. Token manipulation based attacks work in\n",
      "Document 2393:\n",
      "black box settings. The Python framework, TextAttack\n",
      "Document 2394:\n",
      "(Morris et al. 2020), implemented many\n",
      "Document 2395:\n",
      "word and token manipulation attack methods to create adversarial\n",
      "Document 2396:\n",
      "examples for NLP models. Most work in this\n",
      "Document 2397:\n",
      "area experimented with classification and entailment prediction.\n",
      "Document 2398:\n",
      "Ribeiro et al (2018) relied\n",
      "Document 2399:\n",
      "on manually proposed Semantically Equivalent Adversaries\n",
      "Document 2400:\n",
      "Rules (SEARs) to do minimal token\n",
      "Document 2401:\n",
      "manipulation such that the model would fail to generate the\n",
      "Document 2402:\n",
      "right answers. Example rules include (What\n",
      "Document 2403:\n",
      "NOUN→Which NOUN), (WP is\n",
      "Document 2404:\n",
      "→ WP’s’),\n",
      "Document 2405:\n",
      "(was→is), etc. The semantic\n",
      "Document 2406:\n",
      "equivalence after adversarial operation is checked via\n",
      "Document 2407:\n",
      "back-translation. Those rules are proposed via a\n",
      "Document 2408:\n",
      "pretty manual, heuristic process and the type of\n",
      "Document 2409:\n",
      "model “bugs” SEARs are\n",
      "Document 2410:\n",
      "probing for are only limited on sensitivity to minimal token\n",
      "Document 2411:\n",
      "variation, which should not be an issue with increased\n",
      "Document 2412:\n",
      "base LLM capability.\n",
      "Document 2413:\n",
      "In comparison, EDA (Easy Data\n",
      "Document 2414:\n",
      "Augmentation; Wei & Zou 2019) defines\n",
      "Document 2415:\n",
      "a set of simple and more general operations to augment\n",
      "Document 2416:\n",
      "text: synonym replacement, random insertion, random\n",
      "Document 2417:\n",
      "swap or random deletion. EDA augmentation is\n",
      "Document 2418:\n",
      "shown to improve the classification accuracy on several benchmarks.\n",
      "Document 2419:\n",
      "TextFooler (Jin et\n",
      "Document 2420:\n",
      "al. 2019) and BERT-Attack\n",
      "Document 2421:\n",
      "(Li et al. 2020) follows the same\n",
      "Document 2422:\n",
      "process of first identifying the most important and vulnerable words\n",
      "Document 2423:\n",
      "that alter the model prediction the most and then replace\n",
      "Document 2424:\n",
      "those words in some way.\n",
      "Document 2425:\n",
      "Given a classifier $f$ and an\n",
      "Document 2426:\n",
      "input text string $\\mathbf{x}$,\n",
      "Document 2427:\n",
      "the importance score of each word can be measured\n",
      "Document 2428:\n",
      "by:\n",
      "Document 2429:\n",
      "$$\n",
      "Document 2430:\n",
      "I(w_i) =\n",
      "Document 2431:\n",
      "\\begin{cases}\n",
      "Document 2432:\n",
      "f_y(\\math\n",
      "Document 2433:\n",
      "bf{x})\n",
      "Document 2434:\n",
      "-\n",
      "Document 2435:\n",
      "f_y(\\math\n",
      "Document 2436:\n",
      "bf{x}_{\\se\n",
      "Document 2437:\n",
      "tminus\n",
      "Document 2438:\n",
      "w_i}) & \\text{if\n",
      "Document 2439:\n",
      "}f(\\mathbf{x}) =\n",
      "Document 2440:\n",
      "f(\\mathbf\n",
      "Document 2441:\n",
      "{x}_{\\setm\n",
      "Document 2442:\n",
      "inus\n",
      "Document 2443:\n",
      "w_i}) = y\\\\\n",
      "Document 2444:\n",
      "(f_y(\\mat\n",
      "Document 2445:\n",
      "hbf{x})\n",
      "Document 2446:\n",
      "-\n",
      "Document 2447:\n",
      "f_y(\\math\n",
      "Document 2448:\n",
      "bf{x}_{\\se\n",
      "Document 2449:\n",
      "tminus\n",
      "Document 2450:\n",
      "w_i})) +\n",
      "Document 2451:\n",
      "((f_{\\bar\n",
      "Document 2452:\n",
      "{y}}(\\math\n",
      "Document 2453:\n",
      "bf{x})\n",
      "Document 2454:\n",
      "-\n",
      "Document 2455:\n",
      "f_{\\bar{y\n",
      "Document 2456:\n",
      "}}(\\mathbf\n",
      "Document 2457:\n",
      "{x}_{\\setm\n",
      "Document 2458:\n",
      "inus\n",
      "Document 2459:\n",
      "w_i}))) & \\text{if\n",
      "Document 2460:\n",
      "}f(\\mathbf{x}) =\n",
      "Document 2461:\n",
      "y,\n",
      "Document 2462:\n",
      "f(\\mathbf\n",
      "Document 2463:\n",
      "{x}_{\\setm\n",
      "Document 2464:\n",
      "inus\n",
      "Document 2465:\n",
      "w_i}) = \\bar{y},\n",
      "Document 2466:\n",
      "y \\neq \\bar{y}\n",
      "Document 2467:\n",
      "\\end{cases}\n",
      "$$\n",
      "Document 2468:\n",
      "where $f_y$ is the predicted\n",
      "Document 2469:\n",
      "logits for label $y$ and\n",
      "Document 2470:\n",
      "$x_{\\setminus\n",
      "Document 2471:\n",
      "w_i}$ is the input text excluding\n",
      "Document 2472:\n",
      "the target word $w_i$. Words with\n",
      "Document 2473:\n",
      "high importance are good candidates to be replaced, but\n",
      "Document 2474:\n",
      "stop words should be skipped to avoid grammar destruction.\n",
      "Document 2475:\n",
      "TextFooler replaces those words with top\n",
      "Document 2476:\n",
      "synonyms based on word embedding cosine similarity\n",
      "Document 2477:\n",
      "and then further filters by checking that the replacement word\n",
      "Document 2478:\n",
      "still has the same POS tagging and the sentence level\n",
      "Document 2479:\n",
      "similarity is above a threshold. BERT-Attack\n",
      "Document 2480:\n",
      "instead replaces words with semantically similar words via\n",
      "Document 2481:\n",
      "BERT given that context-aware prediction is a\n",
      "Document 2482:\n",
      "very natural use case for masked language models.\n",
      "Document 2483:\n",
      "Adversarial examples discovered this way have some\n",
      "Document 2484:\n",
      "transferability between models, varying by models and\n",
      "Document 2485:\n",
      "tasks.\n",
      "Document 2486:\n",
      "Gradient based Attacks#\n",
      "Document 2487:\n",
      "In the white-box setting, we have\n",
      "Document 2488:\n",
      "full access to the model parameters and architecture. Therefore\n",
      "Document 2489:\n",
      "we can rely on gradient descent to programmatically learn\n",
      "Document 2490:\n",
      "the most effective attacks. Gradient based attacks only\n",
      "Document 2491:\n",
      "work in the white-box setting, like for\n",
      "Document 2492:\n",
      "open source LLMs.\n",
      "Document 2493:\n",
      "GBDA (“Gradient-based\n",
      "Document 2494:\n",
      "Distributional Attack”; Guo et\n",
      "Document 2495:\n",
      "al. 2021) uses\n",
      "Document 2496:\n",
      "Gumbel-Softmax approximation trick to make\n",
      "Document 2497:\n",
      "adversarial loss optimization differentiable, where\n",
      "Document 2498:\n",
      "BERTScore and perplexity are used to enforce\n",
      "Document 2499:\n",
      "perceptibility and fluency. Given an input of\n",
      "Document 2500:\n",
      "tokens\n",
      "Document 2501:\n",
      "$\\mathbf{\n",
      "Document 2502:\n",
      "x}=[x_1,\n",
      "Document 2503:\n",
      "x_2 \\dots\n",
      "Document 2504:\n",
      "x_n]$ where one token\n",
      "Document 2505:\n",
      "$x_i$ can be sampled from a\n",
      "Document 2506:\n",
      "categorical distribution $P_\\Theta$,\n",
      "Document 2507:\n",
      "where  $\\Theta \\in\n",
      "Document 2508:\n",
      "\\mathbb{R}^{n \\times\n",
      "Document 2509:\n",
      "V}$ and $V$ is the token\n",
      "Document 2510:\n",
      "vocabulary size. It is highly\n",
      "Document 2511:\n",
      "over-parameterized, considering that\n",
      "Document 2512:\n",
      "$V$ is usually around\n",
      "Document 2513:\n",
      "$O(10,000)$  and\n",
      "Document 2514:\n",
      "most adversarial examples only need a few token\n",
      "Document 2515:\n",
      "replacements. We have:\n",
      "Document 2516:\n",
      "$$\n",
      "Document 2517:\n",
      "x_i \\sim\n",
      "Document 2518:\n",
      "P_{\\Theta_i} =\n",
      "Document 2519:\n",
      "\\text{Cat\n",
      "Document 2520:\n",
      "egorical}(\n",
      "Document 2521:\n",
      "\\pi_i)\n",
      "Document 2522:\n",
      "=\n",
      "Document 2523:\n",
      "\\text{Cat\n",
      "Document 2524:\n",
      "egorical}(\n",
      "Document 2525:\n",
      "\\text{Soft\n",
      "Document 2526:\n",
      "max}(\\Thet\n",
      "Document 2527:\n",
      "a_i))\n",
      "Document 2528:\n",
      "$$\n",
      "Document 2529:\n",
      "where $\\pi_i \\in\n",
      "Document 2530:\n",
      "\\mathbb{R}^V$ is\n",
      "Document 2531:\n",
      "a vector of token probabilities for the\n",
      "Document 2532:\n",
      "$i$-th token. The adversarial\n",
      "Document 2533:\n",
      "objective function to minimize is to produce incorrect label different\n",
      "Document 2534:\n",
      "from the correct label $y$ for a\n",
      "Document 2535:\n",
      "classifier $f$:\n",
      "Document 2536:\n",
      "$\\min_{\\Theta \\in\n",
      "Document 2537:\n",
      "\\mathbb{R}^{n \\times\n",
      "Document 2538:\n",
      "V}}\n",
      "Document 2539:\n",
      "\\mathbb{E\n",
      "Document 2540:\n",
      "}_{\\mathbf\n",
      "Document 2541:\n",
      "{x}\n",
      "Document 2542:\n",
      "\\sim P_{\\Theta}}\n",
      "Document 2543:\n",
      "\\mathcal{\n",
      "Document 2544:\n",
      "L}_\\text{a\n",
      "Document 2545:\n",
      "dv}(\\mathb\n",
      "Document 2546:\n",
      "f{X},\n",
      "Document 2547:\n",
      "y; f)$. However, on the\n",
      "Document 2548:\n",
      "surface, this is not differentiable because of the\n",
      "Document 2549:\n",
      "categorical distribution. Using\n",
      "Document 2550:\n",
      "Gumbel-softmax approximation (Jang\n",
      "Document 2551:\n",
      "et al. 2016) we approximate the categorical\n",
      "Document 2552:\n",
      "distribution from the Gumbel distribution\n",
      "Document 2553:\n",
      "$\\tilde{P\n",
      "Document 2554:\n",
      "}_\\Theta$\n",
      "Document 2555:\n",
      "by\n",
      "Document 2556:\n",
      "$\\tilde{\\\n",
      "Document 2557:\n",
      "boldsymbol\n",
      "Document 2558:\n",
      "{\\pi}}$:\n",
      "Document 2559:\n",
      "$$\n",
      "Document 2560:\n",
      "\\tilde{\\p\n",
      "Document 2561:\n",
      "i}_i^{(j)}\n",
      "Document 2562:\n",
      "=\n",
      "Document 2563:\n",
      "\\frac{\\ex\n",
      "Document 2564:\n",
      "p(\\frac{\\T\n",
      "Document 2565:\n",
      "heta_{ij}\n",
      "Document 2566:\n",
      "+\n",
      "Document 2567:\n",
      "g_{ij}}{\\\n",
      "Document 2568:\n",
      "tau})}{\\su\n",
      "Document 2569:\n",
      "m_{v=1}^V\n",
      "Document 2570:\n",
      "\\exp(\\fra\n",
      "Document 2571:\n",
      "c{\\Theta_{\n",
      "Document 2572:\n",
      "iv}\n",
      "Document 2573:\n",
      "+ g_{iv}}{\\tau})}\n",
      "Document 2574:\n",
      "$$\n",
      "Document 2575:\n",
      "where $g_{ij} \\sim\n",
      "Document 2576:\n",
      "\\text{Gum\n",
      "Document 2577:\n",
      "bel}(0,\n",
      "Document 2578:\n",
      "1)$; the temperature $\\tau >\n",
      "Document 2579:\n",
      "0$ controls the smoothness of the distribution.\n",
      "Document 2580:\n",
      "Gumbel distribution is used to model the\n",
      "Document 2581:\n",
      "extreme value, maximum or minimum, of a number\n",
      "Document 2582:\n",
      "of samples, irrespective of the sample distribution. The\n",
      "Document 2583:\n",
      "additional Gumbel noise brings in the\n",
      "Document 2584:\n",
      "stochastic decisioning that mimic the sampling process\n",
      "Document 2585:\n",
      "from the categorical distribution.\n",
      "Document 2586:\n",
      "Fig. 2. The probability density plot of\n",
      "Document 2587:\n",
      "$\\text{Gu\n",
      "Document 2588:\n",
      "mbel}(0,\n",
      "Document 2589:\n",
      "1)$. (Image created by\n",
      "Document 2590:\n",
      "ChatGPT)\n",
      "Document 2591:\n",
      "A low temperature $\\tau \\to\n",
      "Document 2592:\n",
      "0$ pushes the convergence to categorical distribution,\n",
      "Document 2593:\n",
      "since sampling from softmax with temperature 0 is\n",
      "Document 2594:\n",
      "deterministic. The “sampling”\n",
      "Document 2595:\n",
      "portion only depends on the value of\n",
      "Document 2596:\n",
      "$g_{ij}$, which is mostly centered\n",
      "Document 2597:\n",
      "around 0.\n",
      "Document 2598:\n",
      "Fig. 3. When the temperature is\n",
      "Document 2599:\n",
      "$\\tau \\to 0$, it reflects the\n",
      "Document 2600:\n",
      "original categorical distribution. When $\\tau\n",
      "Document 2601:\n",
      "\\to \\infty$, it becomes a uniform\n",
      "Document 2602:\n",
      "distribution. The expectations and samples from Gumbel\n",
      "Document 2603:\n",
      "softmax distribution matched well. (Image source:\n",
      "Document 2604:\n",
      "Jang et al. 2016)\n",
      "Document 2605:\n",
      "Let\n",
      "Document 2606:\n",
      "$\\mathbf{e}_j$ be\n",
      "Document 2607:\n",
      "the embedding representation of token $j$. We\n",
      "Document 2608:\n",
      "can approximate $\\mathbf{x}$ with\n",
      "Document 2609:\n",
      "$\\bar{e}(\n",
      "Document 2610:\n",
      "\\tilde{\\bo\n",
      "Document 2611:\n",
      "ldsymbol{\\\n",
      "Document 2612:\n",
      "pi}})$,\n",
      "Document 2613:\n",
      "a weighted average of the embedding vector corresponding to\n",
      "Document 2614:\n",
      "the token probabilities:\n",
      "Document 2615:\n",
      "$\\bar{e}(\n",
      "Document 2616:\n",
      "\\pi_i)\n",
      "Document 2617:\n",
      "= \\sum_{j=1}^V\n",
      "Document 2618:\n",
      "\\pi_i^{(j)}\n",
      "Document 2619:\n",
      "\\mathbf{e}_j$. Note\n",
      "Document 2620:\n",
      "that when $\\pi_i$ is a\n",
      "Document 2621:\n",
      "one-hot vector corresponding to the token\n",
      "Document 2622:\n",
      "$x_i$, we would have\n",
      "Document 2623:\n",
      "$\\bar{e}(\n",
      "Document 2624:\n",
      "\\pi_i)\n",
      "Document 2625:\n",
      "=\n",
      "Document 2626:\n",
      "\\mathbf{e\n",
      "Document 2627:\n",
      "}_{z_i}$.\n",
      "Document 2628:\n",
      "Combining the embedding representation with the\n",
      "Document 2629:\n",
      "Gumbel-softmax approximation, we have\n",
      "Document 2630:\n",
      "a differentiable objective to minimize:\n",
      "Document 2631:\n",
      "$\\min_{\\Theta \\in\n",
      "Document 2632:\n",
      "\\mathbb{R}^{n \\times\n",
      "Document 2633:\n",
      "V}}\n",
      "Document 2634:\n",
      "\\mathbb{E\n",
      "Document 2635:\n",
      "}_{\\tilde{\n",
      "Document 2636:\n",
      "\\boldsymbo\n",
      "Document 2637:\n",
      "l{\\pi}}\n",
      "Document 2638:\n",
      "\\sim\n",
      "Document 2639:\n",
      "\\tilde{P}\n",
      "Document 2640:\n",
      "_{\\Theta}}\n",
      "Document 2641:\n",
      "\\mathcal{\n",
      "Document 2642:\n",
      "L}_\\text{a\n",
      "Document 2643:\n",
      "dv}(\\bar{e\n",
      "Document 2644:\n",
      "}(\\tilde{\\\n",
      "Document 2645:\n",
      "boldsymbol\n",
      "Document 2646:\n",
      "{\\pi}}),\n",
      "Document 2647:\n",
      "y; f)$.\n",
      "Document 2648:\n",
      "Meanwhile, it is also easy to apply\n",
      "Document 2649:\n",
      "differentiable soft constraints with white-box attacks.\n",
      "Document 2650:\n",
      "GBDA experimented with (1) a soft\n",
      "Document 2651:\n",
      "fluency constraint using NLL (negative\n",
      "Document 2652:\n",
      "log-likelihood) and (2)\n",
      "Document 2653:\n",
      "BERTScore (“a similarity score for\n",
      "Document 2654:\n",
      "evaluating text generation that captures the semantic similarity between\n",
      "Document 2655:\n",
      "pairwise tokens in contextualized embeddings of\n",
      "Document 2656:\n",
      "a transformer model.”; Zhang et\n",
      "Document 2657:\n",
      "al. 2019) to measure similarity between two text\n",
      "Document 2658:\n",
      "inputs to ensure the perturbed version does not\n",
      "Document 2659:\n",
      "diverge from the original version too much.\n",
      "Document 2660:\n",
      "Combining all constraints, the final objective function is\n",
      "Document 2661:\n",
      "as follows, where\n",
      "Document 2662:\n",
      "$\\lambda_\\text{lm},\n",
      "Document 2663:\n",
      "\\lambda_\\text{sim} >\n",
      "Document 2664:\n",
      "0$ are preset hyperparameters to control the\n",
      "Document 2665:\n",
      "strength of soft constraints:\n",
      "Document 2666:\n",
      "$$\n",
      "Document 2667:\n",
      "\\mathcal{\n",
      "Document 2668:\n",
      "L}(\\Theta)\n",
      "Document 2669:\n",
      "=\n",
      "Document 2670:\n",
      "\\mathbb{E\n",
      "Document 2671:\n",
      "}_{\\tilde{\n",
      "Document 2672:\n",
      "\\pi}\\sim\\t\n",
      "Document 2673:\n",
      "ilde{P}_\\T\n",
      "Document 2674:\n",
      "heta}\n",
      "Document 2675:\n",
      "[\\mathcal\n",
      "Document 2676:\n",
      "{L}_\\text{\n",
      "Document 2677:\n",
      "adv}(\\math\n",
      "Document 2678:\n",
      "bf{e}(\\til\n",
      "Document 2679:\n",
      "de{\\boldsy\n",
      "Document 2680:\n",
      "mbol{\\pi}}\n",
      "Document 2681:\n",
      "),\n",
      "Document 2682:\n",
      "y; h) +\n",
      "Document 2683:\n",
      "\\lambda_\\text{lm}\n",
      "Document 2684:\n",
      "\\mathcal{\n",
      "Document 2685:\n",
      "L}_\\text{N\n",
      "Document 2686:\n",
      "LL}(\\tilde\n",
      "Document 2687:\n",
      "{\\boldsymb\n",
      "Document 2688:\n",
      "ol{\\pi}})\n",
      "Document 2689:\n",
      "+ \\lambda_\\text{sim}\n",
      "Document 2690:\n",
      "(1 -\n",
      "Document 2691:\n",
      "R_\\text{B\n",
      "Document 2692:\n",
      "ERT}(\\math\n",
      "Document 2693:\n",
      "bf{x},\n",
      "Document 2694:\n",
      "\\tilde{\\b\n",
      "Document 2695:\n",
      "oldsymbol{\n",
      "Document 2696:\n",
      "\\pi}}))]\n",
      "Document 2697:\n",
      "$$\n",
      "Document 2698:\n",
      "Gumbel-softmax tricks are hard\n",
      "Document 2699:\n",
      "to be extended to token deletion or addition and thus\n",
      "Document 2700:\n",
      "it is restricted to only token replacement operations, not\n",
      "Document 2701:\n",
      "deletion or addition.\n",
      "Document 2702:\n",
      "HotFlip (Ebrahimi et\n",
      "Document 2703:\n",
      "al. 2018) treats text operations as inputs in\n",
      "Document 2704:\n",
      "the vector space and measures the derivative of loss with\n",
      "Document 2705:\n",
      "regard to these vectors. Here let’s\n",
      "Document 2706:\n",
      "assume the input vector is a matrix of\n",
      "Document 2707:\n",
      "character-level one-hot encodings,\n",
      "Document 2708:\n",
      "$\\mathbf{x} \\in\n",
      "Document 2709:\n",
      "{0, 1}^{m \\times n\n",
      "Document 2710:\n",
      "\\times V}$ and\n",
      "Document 2711:\n",
      "$\\mathbf{x}_{ij}\n",
      "Document 2712:\n",
      "\\in {0, 1}^V$,\n",
      "Document 2713:\n",
      "where $m$ is the maximum number of\n",
      "Document 2714:\n",
      "words, $n$ is the maximum number of\n",
      "Document 2715:\n",
      "characters per word and $V$ is the alphabet\n",
      "Document 2716:\n",
      "size. Given the original input vector\n",
      "Document 2717:\n",
      "$\\mathbf{x}$, we construct a\n",
      "Document 2718:\n",
      "new vector\n",
      "Document 2719:\n",
      "$\\mathbf{x}_{ij,\n",
      "Document 2720:\n",
      "a\\to b}$ with the\n",
      "Document 2721:\n",
      "$j$-th character of the\n",
      "Document 2722:\n",
      "$i$-th word changing from $a\n",
      "Document 2723:\n",
      "\\to b$, and thus we have\n",
      "Document 2724:\n",
      "$x_{ij}^{(a)} =\n",
      "Document 2725:\n",
      "1$ but $x_{ij,\n",
      "Document 2726:\n",
      "a\\to b}^{(a)} =\n",
      "Document 2727:\n",
      "0, x_{ij, a\\to\n",
      "Document 2728:\n",
      "b}^{(b)} = 1$.\n",
      "Document 2729:\n",
      "The change in loss according to first-order\n",
      "Document 2730:\n",
      "Taylor expansion is:\n",
      "Document 2731:\n",
      "$$\n",
      "Document 2732:\n",
      "\\nabla_{\\\n",
      "Document 2733:\n",
      "mathbf{x}_\n",
      "Document 2734:\n",
      "{i,j,a\n",
      "Document 2735:\n",
      "\\to b} -\n",
      "Document 2736:\n",
      "\\mathbf{x}}\n",
      "Document 2737:\n",
      "\\mathcal{\n",
      "Document 2738:\n",
      "L}_\\text{a\n",
      "Document 2739:\n",
      "dv}(\\mathb\n",
      "Document 2740:\n",
      "f{x},\n",
      "Document 2741:\n",
      "y) = \\nabla_x\n",
      "Document 2742:\n",
      "\\mathcal{\n",
      "Document 2743:\n",
      "L}_\\text{a\n",
      "Document 2744:\n",
      "dv}(\\mathb\n",
      "Document 2745:\n",
      "f{x},\n",
      "Document 2746:\n",
      "y)^\\top (\n",
      "Document 2747:\n",
      "\\mathbf{x\n",
      "Document 2748:\n",
      "}_{i,j,a\n",
      "Document 2749:\n",
      "\\to b} -\n",
      "Document 2750:\n",
      "\\mathbf{x})\n",
      "Document 2751:\n",
      "$$\n",
      "Document 2752:\n",
      "This objective is optimized to select the vector to\n",
      "Document 2753:\n",
      "minimize the adversarial loss using only one backward\n",
      "Document 2754:\n",
      "propagation.\n",
      "Document 2755:\n",
      "$$\n",
      "Document 2756:\n",
      "\\min_{i, j, b}\n",
      "Document 2757:\n",
      "\\nabla_{\\\n",
      "Document 2758:\n",
      "mathbf{x}_\n",
      "Document 2759:\n",
      "{i,j,a\n",
      "Document 2760:\n",
      "\\to b} -\n",
      "Document 2761:\n",
      "\\mathbf{x}}\n",
      "Document 2762:\n",
      "\\mathcal{\n",
      "Document 2763:\n",
      "L}_\\text{a\n",
      "Document 2764:\n",
      "dv}(\\mathb\n",
      "Document 2765:\n",
      "f{x},\n",
      "Document 2766:\n",
      "y) =\n",
      "Document 2767:\n",
      "\\min_{i,j,b}\n",
      "Document 2768:\n",
      "\\frac{\\pa\n",
      "Document 2769:\n",
      "rtial\\math\n",
      "Document 2770:\n",
      "cal{L}_\\te\n",
      "Document 2771:\n",
      "xt{adv}}{\\\n",
      "Document 2772:\n",
      "partial\n",
      "Document 2773:\n",
      "\\mathbf{x\n",
      "Document 2774:\n",
      "}_{ij}}^{(\n",
      "Document 2775:\n",
      "b)}\n",
      "Document 2776:\n",
      "-\n",
      "Document 2777:\n",
      "\\frac{\\pa\n",
      "Document 2778:\n",
      "rtial\\math\n",
      "Document 2779:\n",
      "cal{L}_\\te\n",
      "Document 2780:\n",
      "xt{adv}}{\\\n",
      "Document 2781:\n",
      "partial\n",
      "Document 2782:\n",
      "\\mathbf{x\n",
      "Document 2783:\n",
      "}_{ij}}^{(\n",
      "Document 2784:\n",
      "a)}\n",
      "Document 2785:\n",
      "$$\n",
      "Document 2786:\n",
      "To apply multiple flips, we can run a\n",
      "Document 2787:\n",
      "beam search of $r$ steps of the beam\n",
      "Document 2788:\n",
      "width $b$, taking\n",
      "Document 2789:\n",
      "$O(rb)$ forward steps.\n",
      "Document 2790:\n",
      "HotFlip can be extended to token deletion or\n",
      "Document 2791:\n",
      "addition by representing that with multiple flip operations in the\n",
      "Document 2792:\n",
      "form of position shifts.\n",
      "Document 2793:\n",
      "Wallace et al. (2019) proposed\n",
      "Document 2794:\n",
      "a gradient-guided search over tokens to find short\n",
      "Document 2795:\n",
      "sequences (E.g. 1 token for classification\n",
      "Document 2796:\n",
      "and 4 tokens for generation), named Universal\n",
      "Document 2797:\n",
      "Adversarial Triggers (UAT),\n",
      "Document 2798:\n",
      "to trigger a model to produce a specific prediction.\n",
      "Document 2799:\n",
      "UATs are input-agnostic, meaning\n",
      "Document 2800:\n",
      "that these trigger tokens can be concatenated\n",
      "Document 2801:\n",
      "as prefix (or suffix) to any input from\n",
      "Document 2802:\n",
      "a dataset to take effect. Given any text input\n",
      "Document 2803:\n",
      "sequence from a data distribution\n",
      "Document 2804:\n",
      "$\\mathbf{x} \\in\n",
      "Document 2805:\n",
      "\\mathcal{D}$, attackers can optimize\n",
      "Document 2806:\n",
      "the triggering tokens $\\mathbf{t}$\n",
      "Document 2807:\n",
      "leading to a target class\n",
      "Document 2808:\n",
      "$\\tilde{y}$\n",
      "Document 2809:\n",
      "($\\neq y$, different from the ground\n",
      "Document 2810:\n",
      "truth) :\n",
      "Document 2811:\n",
      "$$\n",
      "Document 2812:\n",
      "\\arg\\min_\n",
      "Document 2813:\n",
      "{\\mathbf{t\n",
      "Document 2814:\n",
      "}}\n",
      "Document 2815:\n",
      "\\mathbb{E\n",
      "Document 2816:\n",
      "}_{\\mathbf\n",
      "Document 2817:\n",
      "{x}\\sim\\ma\n",
      "Document 2818:\n",
      "thcal{D}}\n",
      "Document 2819:\n",
      "[\\mathcal\n",
      "Document 2820:\n",
      "{L}_\\text{\n",
      "Document 2821:\n",
      "adv}(\\tild\n",
      "Document 2822:\n",
      "e{y},\n",
      "Document 2823:\n",
      "f([\\mathbf{t};\n",
      "Document 2824:\n",
      "\\mathbf{x}]))]\n",
      "Document 2825:\n",
      "$$\n",
      "Document 2826:\n",
      "Then let’s apply HotFlip\n",
      "Document 2827:\n",
      "to search for the most effective token based on the\n",
      "Document 2828:\n",
      "change in loss approximated by first-order Taylor\n",
      "Document 2829:\n",
      "expansion. We would convert the triggering tokens\n",
      "Document 2830:\n",
      "$\\mathbf{t}$ into their\n",
      "Document 2831:\n",
      "one-hot embedding representations, each vector of\n",
      "Document 2832:\n",
      "dimension size $d$, form\n",
      "Document 2833:\n",
      "$\\mathbf{e}$ and update the\n",
      "Document 2834:\n",
      "embedding of every trigger tokens to minimize the\n",
      "Document 2835:\n",
      "first-order Taylor expansion:\n",
      "Document 2836:\n",
      "$$\n",
      "Document 2837:\n",
      "\\arg\\min_\n",
      "Document 2838:\n",
      "{\\mathbf{e\n",
      "Document 2839:\n",
      "}'_i\n",
      "Document 2840:\n",
      "\\in \\mathcal{V}}\n",
      "Document 2841:\n",
      "[\\mathbf{\n",
      "Document 2842:\n",
      "e}'_i\n",
      "Document 2843:\n",
      "-\n",
      "Document 2844:\n",
      "\\mathbf{e\n",
      "Document 2845:\n",
      "}_i]^\\top\n",
      "Document 2846:\n",
      "\\nabla_{\\\n",
      "Document 2847:\n",
      "mathbf{e}_\n",
      "Document 2848:\n",
      "i}\n",
      "Document 2849:\n",
      "\\mathcal{\n",
      "Document 2850:\n",
      "L}_\\text{a\n",
      "Document 2851:\n",
      "dv}\n",
      "Document 2852:\n",
      "$$\n",
      "Document 2853:\n",
      "where $\\mathcal{V}$ is\n",
      "Document 2854:\n",
      "the embedding matrix of all the tokens.\n",
      "Document 2855:\n",
      "$\\nabla_{\n",
      "Document 2856:\n",
      "\\mathbf{e}\n",
      "Document 2857:\n",
      "_i}\n",
      "Document 2858:\n",
      "\\mathcal{\n",
      "Document 2859:\n",
      "L}_\\text{a\n",
      "Document 2860:\n",
      "dv}$\n",
      "Document 2861:\n",
      "is the average gradient of the task loss over a\n",
      "Document 2862:\n",
      "batch around the current embedding of the\n",
      "Document 2863:\n",
      "$i$-th token in the adversarial\n",
      "Document 2864:\n",
      "triggering sequence $\\mathbf{t}$. We\n",
      "Document 2865:\n",
      "can brute-force the optimal\n",
      "Document 2866:\n",
      "$\\mathbf{\n",
      "Document 2867:\n",
      "e}’_i$\n",
      "Document 2868:\n",
      "by a big dot product of size embedding of\n",
      "Document 2869:\n",
      "the entire vocabulary  $\\vert\n",
      "Document 2870:\n",
      "\\mathcal{V} \\vert$\n",
      "Document 2871:\n",
      "$\\times$ the embedding dimension $d$.\n",
      "Document 2872:\n",
      "Matrix multiplication of this size is cheap and can be\n",
      "Document 2873:\n",
      "run in parallel.\n",
      "Document 2874:\n",
      "AutoPrompt (Shin et al.,\n",
      "Document 2875:\n",
      "2020) utilizes the same gradient-based search strategy\n",
      "Document 2876:\n",
      "to find the most effective prompt template for a diverse\n",
      "Document 2877:\n",
      "set of tasks.\n",
      "Document 2878:\n",
      "The above token search method can be augmented with\n",
      "Document 2879:\n",
      "beam search. When looking for the optimal token\n",
      "Document 2880:\n",
      "embedding\n",
      "Document 2881:\n",
      "$\\mathbf{\n",
      "Document 2882:\n",
      "e}’_i$,\n",
      "Document 2883:\n",
      "we can pick top-$k$ candidates instead of\n",
      "Document 2884:\n",
      "a single one, searching from left to right and\n",
      "Document 2885:\n",
      "score each beam by\n",
      "Document 2886:\n",
      "$\\mathcal\n",
      "Document 2887:\n",
      "{L}_\\text{\n",
      "Document 2888:\n",
      "adv}$\n",
      "Document 2889:\n",
      "on the current data batch.\n",
      "Document 2890:\n",
      "Fig. 4. Illustration of how Universal\n",
      "Document 2891:\n",
      "Adversarial Triggers (UAT)\n",
      "Document 2892:\n",
      "works. (Image source: Wallace et al.\n",
      "Document 2893:\n",
      "2019)\n",
      "Document 2894:\n",
      "The design of the loss\n",
      "Document 2895:\n",
      "$\\mathcal\n",
      "Document 2896:\n",
      "{L}_\\text{\n",
      "Document 2897:\n",
      "adv}$\n",
      "Document 2898:\n",
      "for UAT is task-specific. Classification\n",
      "Document 2899:\n",
      "or reading comprehension relies on cross entropy. In their\n",
      "Document 2900:\n",
      "experiment, conditional text generation is configured to maximize the\n",
      "Document 2901:\n",
      "likelihood of a language model $p$ generating similar\n",
      "Document 2902:\n",
      "content to a set of bad outputs\n",
      "Document 2903:\n",
      "$\\mathcal\n",
      "Document 2904:\n",
      "{Y}_\\text{\n",
      "Document 2905:\n",
      "bad}$\n",
      "Document 2906:\n",
      "given any user input:\n",
      "Document 2907:\n",
      "$$\n",
      "Document 2908:\n",
      "\\mathcal{\n",
      "Document 2909:\n",
      "L}_\\text{a\n",
      "Document 2910:\n",
      "dv}\n",
      "Document 2911:\n",
      "=\n",
      "Document 2912:\n",
      "\\mathbb{E\n",
      "Document 2913:\n",
      "}_{\\mathbf\n",
      "Document 2914:\n",
      "{y}\n",
      "Document 2915:\n",
      "\\sim\n",
      "Document 2916:\n",
      "\\mathcal{\n",
      "Document 2917:\n",
      "Y}_\\text{b\n",
      "Document 2918:\n",
      "ad},\n",
      "Document 2919:\n",
      "\\mathbf{x} \\sim\n",
      "Document 2920:\n",
      "\\mathcal{X}}\n",
      "Document 2921:\n",
      "\\sum_{i=1\n",
      "Document 2922:\n",
      "}^{\\vert\n",
      "Document 2923:\n",
      "\\mathcal{\n",
      "Document 2924:\n",
      "Y}_\\text{b\n",
      "Document 2925:\n",
      "ad}\n",
      "Document 2926:\n",
      "\\vert} \\log\\big(1 -\n",
      "Document 2927:\n",
      "\\log(1 - p(y_i\n",
      "Document 2928:\n",
      "\\vert \\mathbf{t},\n",
      "Document 2929:\n",
      "\\mathbf{x}, y_1,\n",
      "Document 2930:\n",
      "\\dots,\n",
      "Document 2931:\n",
      "y_{i-1}))\n",
      "Document 2932:\n",
      "\\big)\n",
      "Document 2933:\n",
      "$$\n",
      "Document 2934:\n",
      "It is impossible to exhaust the entire space of\n",
      "Document 2935:\n",
      "$\\mathcal{X},\n",
      "Document 2936:\n",
      "\\mathcal{\n",
      "Document 2937:\n",
      "Y}_\\text{b\n",
      "Document 2938:\n",
      "ad}$\n",
      "Document 2939:\n",
      "in practice, but the paper got decent results by\n",
      "Document 2940:\n",
      "representing each set with a small number of examples.\n",
      "Document 2941:\n",
      "For example, their experiments used only 30 manually written\n",
      "Document 2942:\n",
      "racist and non-racist tweets as approximations\n",
      "Document 2943:\n",
      "for\n",
      "Document 2944:\n",
      "$\\mathcal\n",
      "Document 2945:\n",
      "{Y}_\\text{\n",
      "Document 2946:\n",
      "bad}$\n",
      "Document 2947:\n",
      "respectively. They later found that a small number of\n",
      "Document 2948:\n",
      "examples for\n",
      "Document 2949:\n",
      "$\\mathcal\n",
      "Document 2950:\n",
      "{Y}_\\text{\n",
      "Document 2951:\n",
      "bad}$\n",
      "Document 2952:\n",
      "and ignoring $\\mathcal{X}$\n",
      "Document 2953:\n",
      "(i.e. no\n",
      "Document 2954:\n",
      "$\\mathbf{x}$ in the formula\n",
      "Document 2955:\n",
      "above) give good enough results.\n",
      "Document 2956:\n",
      "Fig. 5. Samples of Universal\n",
      "Document 2957:\n",
      "Adversarial Triggers (UAT)\n",
      "Document 2958:\n",
      "on different types of language tasks. (Image\n",
      "Document 2959:\n",
      "source: Wallace et al. 2019)\n",
      "Document 2960:\n",
      "Why UATs work is an interesting\n",
      "Document 2961:\n",
      "question. Because they are input-agnostic and\n",
      "Document 2962:\n",
      "can transfer between models with different embeddings,\n",
      "Document 2963:\n",
      "tokenization and architecture, UATs probably exploit\n",
      "Document 2964:\n",
      "biases effectively in the training data that gets baked into\n",
      "Document 2965:\n",
      "the global model behavior.\n",
      "Document 2966:\n",
      "One drawback with UAT (Universal\n",
      "Document 2967:\n",
      "Adversarial Trigger) attacks is that it is\n",
      "Document 2968:\n",
      "easy to detect them because the learned triggers are often\n",
      "Document 2969:\n",
      "nonsensical. Mehrabi et al.\n",
      "Document 2970:\n",
      "(2022) studied two variations of UAT\n",
      "Document 2971:\n",
      "that encourage learned toxic triggers to be imperceptible\n",
      "Document 2972:\n",
      "in the context of multi-turn conversations. The\n",
      "Document 2973:\n",
      "goal is to create attack messages that can effectively trigger\n",
      "Document 2974:\n",
      "toxic responses from a model given a conversation, while\n",
      "Document 2975:\n",
      "the attack is fluent, coherent and relevant to this\n",
      "Document 2976:\n",
      "conversation.\n",
      "Document 2977:\n",
      "They explored two variations of UAT:\n",
      "Document 2978:\n",
      "Variation #1: UAT-LM\n",
      "Document 2979:\n",
      "(Universal Adversarial Trigger with Language Model\n",
      "Document 2980:\n",
      "Loss) adds a constraint on language model\n",
      "Document 2981:\n",
      "logprob on the trigger tokens,\n",
      "Document 2982:\n",
      "$\\sum_{j=\n",
      "Document 2983:\n",
      "1}^{\\vert\\\n",
      "Document 2984:\n",
      "mathbf{t}\\\n",
      "Document 2985:\n",
      "vert}\n",
      "Document 2986:\n",
      "\\log\n",
      "Document 2987:\n",
      "p(\\textbf{t}_j\n",
      "Document 2988:\n",
      "\\mid\n",
      "Document 2989:\n",
      "\\textbf{t\n",
      "Document 2990:\n",
      "}_{1:j−1};\n",
      "Document 2991:\n",
      "\\theta)$, to encourage the model to\n",
      "Document 2992:\n",
      "learn sensical token combination.\n",
      "Document 2993:\n",
      "Variation #2: UTSC\n",
      "Document 2994:\n",
      "(Unigram Trigger with Selection Criteria)\n",
      "Document 2995:\n",
      "follows a few steps to generate attack messages by\n",
      "Document 2996:\n",
      "(1) first generating a set of\n",
      "Document 2997:\n",
      "unigram UAT tokens, (2)\n",
      "Document 2998:\n",
      "and then passing these unigram triggers and conversation\n",
      "Document 2999:\n",
      "history to the language model to generate different attack\n",
      "Document 3000:\n",
      "utterances. Generated attacks are filtered according to\n",
      "Document 3001:\n",
      "toxicity scores of different toxicity classifiers.\n",
      "Document 3002:\n",
      "UTSC-1,\n",
      "Document 3003:\n",
      "UTSC-2 and\n",
      "Document 3004:\n",
      "UTSC-3 adopt three filter criteria,\n",
      "Document 3005:\n",
      "by maximum toxicity score,  maximum toxicity score when\n",
      "Document 3006:\n",
      "above a threshold, and minimum score, respectively.\n",
      "Document 3007:\n",
      "Fig. 6. Illustration of how\n",
      "Document 3008:\n",
      "UTSC (unigram trigger with selection\n",
      "Document 3009:\n",
      "criteria) works. (Image source:\n",
      "Document 3010:\n",
      "Mehrabi et al. 2022)\n",
      "Document 3011:\n",
      "UAT-LM and\n",
      "Document 3012:\n",
      "UTSC-1 are performing comparable to\n",
      "Document 3013:\n",
      "UAT baseline, but perplexity of UAT\n",
      "Document 3014:\n",
      "attack phrases are absurdly high (~\n",
      "Document 3015:\n",
      "10**7; according to\n",
      "Document 3016:\n",
      "GPT-2), much higher than\n",
      "Document 3017:\n",
      "UAT-LM (~10**4) and\n",
      "Document 3018:\n",
      "UTSC-1 (~160). High\n",
      "Document 3019:\n",
      "perplexity makes an attack more vulnerable to be detected\n",
      "Document 3020:\n",
      "and mitigated. UTSC-1 attacks\n",
      "Document 3021:\n",
      "are shown to be more coherent, fluent and relevant\n",
      "Document 3022:\n",
      "than others, according to human evaluation.\n",
      "Document 3023:\n",
      "Fig. 7. Attack success rate measured by\n",
      "Document 3024:\n",
      "different toxicity classifiers on the defender model's response\n",
      "Document 3025:\n",
      "to generated attacks. The \"Safety classifier\"\n",
      "Document 3026:\n",
      "is from Xu et al. 2020. (Image\n",
      "Document 3027:\n",
      "source: [Mehrabi et al.\n",
      "Document 3028:\n",
      "2022)\n",
      "Document 3029:\n",
      "Zou et al. (2023)\n",
      "Document 3030:\n",
      "also studied universal adversarial triggering tokens as suffixes\n",
      "Document 3031:\n",
      "in concatenation to the input request. They\n",
      "Document 3032:\n",
      "specifically looked into malicious requests for LLMs for which\n",
      "Document 3033:\n",
      "the model should refuse to answer. In fact,\n",
      "Document 3034:\n",
      "refusal on disallowed content categories such as criminal advice\n",
      "Document 3035:\n",
      "is one important safety mitigation built into\n",
      "Document 3036:\n",
      "GPT-4 (OpenAI 2023).\n",
      "Document 3037:\n",
      "The adversarial goal is to trigger LLMs to\n",
      "Document 3038:\n",
      "output affirmative responses even facing requests that should be\n",
      "Document 3039:\n",
      "refused. That is, given a malicious request,\n",
      "Document 3040:\n",
      "model can respond with sth like \"Sure,\n",
      "Document 3041:\n",
      "here is how to ...\". The expected affirmative response\n",
      "Document 3042:\n",
      "is also configured to repeat partial user prompts to avoid\n",
      "Document 3043:\n",
      "the suffix simply changing topics to optimize a\n",
      "Document 3044:\n",
      "\"sure\" response. The loss function is simply\n",
      "Document 3045:\n",
      "the NLL of outputting target response.\n",
      "Document 3046:\n",
      "Fig. 8. Illustration of where\n",
      "Document 3047:\n",
      "adversarial triggers are introduced. The red exclamation\n",
      "Document 3048:\n",
      "points represent adversarial tokens to be learned.\n",
      "Document 3049:\n",
      "(Image source: Zou et al.\n",
      "Document 3050:\n",
      "2023)\n",
      "Document 3051:\n",
      "The experiments for triggering affirmative model responses across multiple\n",
      "Document 3052:\n",
      "inputs were conducted on two different models,\n",
      "Document 3053:\n",
      "Vicuna-7b and\n",
      "Document 3054:\n",
      "Vicuna-13b. They adopted greedy coordinate\n",
      "Document 3055:\n",
      "gradient (GCG) based search to greedily\n",
      "Document 3056:\n",
      "find one candidate that can reduce the loss the most\n",
      "Document 3057:\n",
      "among all possible single-token substitutions. It\n",
      "Document 3058:\n",
      "is not feasible to literally evaluate all token\n",
      "Document 3059:\n",
      "substitutions, so they run gradient based token search\n",
      "Document 3060:\n",
      "strategy similar to UAT and AutoPrompt to\n",
      "Document 3061:\n",
      "find top candidates per token, each associated with the\n",
      "Document 3062:\n",
      "largest negative gradient of the loss.\n",
      "Document 3063:\n",
      "Given a set of expected $m$\n",
      "Document 3064:\n",
      "input-output pairs ${\n",
      "Document 3065:\n",
      "(\\mathbf{\n",
      "Document 3066:\n",
      "x}^{(i)},\n",
      "Document 3067:\n",
      "\\mathbf{y\n",
      "Document 3068:\n",
      "}^{(i)})\n",
      "Document 3069:\n",
      "}_{i=1}^m$\n",
      "Document 3070:\n",
      "(E.g. the input \"Give instructions\n",
      "Document 3071:\n",
      "for building a pipe bomb\", will be associated with\n",
      "Document 3072:\n",
      "an output \"Sure, here’s instructions\n",
      "Document 3073:\n",
      "for building a pipe bomb:\") and  an\n",
      "Document 3074:\n",
      "adversarial suffix $\\mathbf{t}$\n",
      "Document 3075:\n",
      "of length $L$ :\n",
      "Document 3076:\n",
      "Per token in the adversarial suffix\n",
      "Document 3077:\n",
      "$t_j, 1 \\leq j\n",
      "Document 3078:\n",
      "\\leq L$, we find the top\n",
      "Document 3079:\n",
      "$k$ values with largest negative gradient of\n",
      "Document 3080:\n",
      "NLL loss,\n",
      "Document 3081:\n",
      "$\\sum_{i=\n",
      "Document 3082:\n",
      "1}^{m_c}\n",
      "Document 3083:\n",
      "\\nabla_{\\\n",
      "Document 3084:\n",
      "textbf{e}_\n",
      "Document 3085:\n",
      "{t_j}}\n",
      "Document 3086:\n",
      "p(\\mathbf\n",
      "Document 3087:\n",
      "{y}^{(i)}\n",
      "Document 3088:\n",
      "\\vert\n",
      "Document 3089:\n",
      "\\mathbf{x\n",
      "Document 3090:\n",
      "}^{(i)},\n",
      "Document 3091:\n",
      "\\mathbf{t})$, of the language\n",
      "Document 3092:\n",
      "model $p$. And $m_c$\n",
      "Document 3093:\n",
      "starts at 1.\n",
      "Document 3094:\n",
      "Then $B < kL$ token substitution\n",
      "Document 3095:\n",
      "candidates\n",
      "Document 3096:\n",
      "${\\mathbf\n",
      "Document 3097:\n",
      "{t}^{(1)},\n",
      "Document 3098:\n",
      "\\dots,\n",
      "Document 3099:\n",
      "\\mathbf{t\n",
      "Document 3100:\n",
      "}^{(B)}}$\n",
      "Document 3101:\n",
      "are selected out of $kL$ options at\n",
      "Document 3102:\n",
      "random and the one with best loss\n",
      "Document 3103:\n",
      "(i.e. largest\n",
      "Document 3104:\n",
      "log-likelihood) is selected to set as\n",
      "Document 3105:\n",
      "the next version of $\\mathbf{t}\n",
      "Document 3106:\n",
      "=\n",
      "Document 3107:\n",
      "\\mathbf{t\n",
      "Document 3108:\n",
      "}^{(b^*)}$\n",
      "Document 3109:\n",
      ".\n",
      "Document 3110:\n",
      "The process is basically to (1) first narrow\n",
      "Document 3111:\n",
      "down a rough set of substitution candidates with\n",
      "Document 3112:\n",
      "first-order Taylor expansion approximation and (2)\n",
      "Document 3113:\n",
      "then compute the exact change in loss for the most\n",
      "Document 3114:\n",
      "promising candidates. Step (2) is expensive so\n",
      "Document 3115:\n",
      "we cannot afford doing that for a big number of\n",
      "Document 3116:\n",
      "candidates.\n",
      "Document 3117:\n",
      "Only when the current\n",
      "Document 3118:\n",
      "$\\mathbf{t}$ successfully triggers\n",
      "Document 3119:\n",
      "${\n",
      "Document 3120:\n",
      "(\\mathbf{\n",
      "Document 3121:\n",
      "x}^{(i)},\n",
      "Document 3122:\n",
      "\\mathbf{y\n",
      "Document 3123:\n",
      "}^{(i)})\n",
      "Document 3124:\n",
      "}_{i=1}^{\n",
      "Document 3125:\n",
      "m_c}$,\n",
      "Document 3126:\n",
      "we increase $m_c = m_c\n",
      "Document 3127:\n",
      "+ 1$. They found this incremental scheduling works better\n",
      "Document 3128:\n",
      "than trying to optimize the whole set of\n",
      "Document 3129:\n",
      "$m$ prompts all at once. This\n",
      "Document 3130:\n",
      "approximates to curriculum learning.\n",
      "Document 3131:\n",
      "The above step 1-3 are repeated for\n",
      "Document 3132:\n",
      "a number of iterations.\n",
      "Document 3133:\n",
      "Although their attack sequences are only trained on\n",
      "Document 3134:\n",
      "open-source models, they show\n",
      "Document 3135:\n",
      "non-trivial transferability to other commercial\n",
      "Document 3136:\n",
      "models, indicating that white-box attacks on\n",
      "Document 3137:\n",
      "open-sourced models can be effective for private\n",
      "Document 3138:\n",
      "models, especially when the underlying training data has\n",
      "Document 3139:\n",
      "overlaps. Note that Vicuna is trained with\n",
      "Document 3140:\n",
      "data collected from\n",
      "Document 3141:\n",
      "GPT-3.5-turbo\n",
      "Document 3142:\n",
      "(via shareGPT), which is essentially\n",
      "Document 3143:\n",
      "distillation, so the attack works more like\n",
      "Document 3144:\n",
      "white-box attack.\n",
      "Document 3145:\n",
      "Fig. 9. Average attack success rate on\n",
      "Document 3146:\n",
      "\"HB (harmful behavior)\" instructions, averaging\n",
      "Document 3147:\n",
      "5 prompts. Two baselines are \"HB\"\n",
      "Document 3148:\n",
      "prompt only or HB prompt followed by `\"Sure\n",
      "Document 3149:\n",
      "here's\"` as a suffix.\n",
      "Document 3150:\n",
      "\"Concatenation\" combines several adversarial\n",
      "Document 3151:\n",
      "suffixes to construct a more powerful attack with a\n",
      "Document 3152:\n",
      "significantly higher success rate in some cases.\n",
      "Document 3153:\n",
      "\"Ensemble\" tracks if any of 5 prompts\n",
      "Document 3154:\n",
      "and the concatenated one succeeded. (Image\n",
      "Document 3155:\n",
      "source: Zou et al. 2023)\n",
      "Document 3156:\n",
      "ARCA (“Autoregressive\n",
      "Document 3157:\n",
      "Randomized Coordinate Ascent”; Jones\n",
      "Document 3158:\n",
      "et al. 2023) considers a broader set\n",
      "Document 3159:\n",
      "of optimization problems to find input-output pairs\n",
      "Document 3160:\n",
      "$(\\mathbf{x},\n",
      "Document 3161:\n",
      "\\mathbf{y})$ that match certain\n",
      "Document 3162:\n",
      "behavior pattern; such as non-toxic input\n",
      "Document 3163:\n",
      "starting with \"Barack Obama\" but leading to\n",
      "Document 3164:\n",
      "toxic output. Given an auditing objective\n",
      "Document 3165:\n",
      "$\\phi: \\mathcal{X}\n",
      "Document 3166:\n",
      "\\times \\mathcal{Y} \\to\n",
      "Document 3167:\n",
      "\\mathbb{R}$ that maps a\n",
      "Document 3168:\n",
      "pair of (input prompt, output completion) into\n",
      "Document 3169:\n",
      "scores. Examples of behavior patterns captured by\n",
      "Document 3170:\n",
      "$\\phi$ are as follows:\n",
      "Document 3171:\n",
      "Derogatory comments about celebrities:\n",
      "Document 3172:\n",
      "$\\phi(\\mathbf{x},\n",
      "Document 3173:\n",
      "\\mathbf{y}) =\n",
      "Document 3174:\n",
      "\\texttt{S\n",
      "Document 3175:\n",
      "tartsWith}\n",
      "Document 3176:\n",
      "(\\mathbf{x\n",
      "Document 3177:\n",
      "},\n",
      "Document 3178:\n",
      "[\\text{celebrity}]) +\n",
      "Document 3179:\n",
      "\\texttt{N\n",
      "Document 3180:\n",
      "otToxic}(\\\n",
      "Document 3181:\n",
      "mathbf{x})\n",
      "Document 3182:\n",
      "+\n",
      "Document 3183:\n",
      "\\texttt{T\n",
      "Document 3184:\n",
      "oxic}(\\mat\n",
      "Document 3185:\n",
      "hbf{y})$.\n",
      "Document 3186:\n",
      "Language switching:\n",
      "Document 3187:\n",
      "$\\phi(\\mathbf{x},\n",
      "Document 3188:\n",
      "\\mathbf{y}) =\n",
      "Document 3189:\n",
      "\\texttt{F\n",
      "Document 3190:\n",
      "rench}(\\ma\n",
      "Document 3191:\n",
      "thbf{x})\n",
      "Document 3192:\n",
      "+\n",
      "Document 3193:\n",
      "\\texttt{E\n",
      "Document 3194:\n",
      "nglish}(\\m\n",
      "Document 3195:\n",
      "athbf{y})$\n",
      "Document 3196:\n",
      ".\n",
      "Document 3197:\n",
      "The optimization objective for a language model\n",
      "Document 3198:\n",
      "$p$ is:\n",
      "Document 3199:\n",
      "$$\n",
      "Document 3200:\n",
      "\\max_{(\\m\n",
      "Document 3201:\n",
      "athbf{x},\n",
      "Document 3202:\n",
      "\\mathbf{y}) \\in\n",
      "Document 3203:\n",
      "\\mathcal{X} \\times\n",
      "Document 3204:\n",
      "\\mathcal{Y}}\n",
      "Document 3205:\n",
      "\\phi(\\mathbf{x},\n",
      "Document 3206:\n",
      "\\mathbf{y}) \\quad\n",
      "Document 3207:\n",
      "\\text{s.t. }\n",
      "Document 3208:\n",
      "p(\\mathbf{x}) \\Rightarrow\n",
      "Document 3209:\n",
      "\\mathbf{y}\n",
      "Document 3210:\n",
      "$$\n",
      "Document 3211:\n",
      "where $p(\\mathbf{x})\n",
      "Document 3212:\n",
      "\\Rightarrow\n",
      "Document 3213:\n",
      "\\mathbf{y}$ informally represents\n",
      "Document 3214:\n",
      "the sampling process (i.e.\n",
      "Document 3215:\n",
      "$\\mathbf{y} \\sim\n",
      "Document 3216:\n",
      "p(.\\mid\n",
      "Document 3217:\n",
      "\\mathbf{x})$).\n",
      "Document 3218:\n",
      "To overcome LLM sampling being\n",
      "Document 3219:\n",
      "non-differentiable, ARCA maximize the\n",
      "Document 3220:\n",
      "log-likelihood of language model generation instead:\n",
      "Document 3221:\n",
      "$$\n",
      "Document 3222:\n",
      "\\text{max\n",
      "Document 3223:\n",
      "}_{(\\mathb\n",
      "Document 3224:\n",
      "f{x},\n",
      "Document 3225:\n",
      "\\mathbf{y}) \\in\n",
      "Document 3226:\n",
      "\\mathcal{X} \\times\n",
      "Document 3227:\n",
      "\\mathcal{\n",
      "Document 3228:\n",
      "Y}}\\;\\phi(\n",
      "Document 3229:\n",
      "\\mathbf{x}\n",
      "Document 3230:\n",
      ",\n",
      "Document 3231:\n",
      "\\mathbf{y}) +\n",
      "Document 3232:\n",
      "\\lambda_\\\n",
      "Document 3233:\n",
      "text{LLM}\\\n",
      "Document 3234:\n",
      ";\\log\n",
      "Document 3235:\n",
      "p ( \\mathbf{y} \\mid\n",
      "Document 3236:\n",
      "\\mathbf{x})\n",
      "Document 3237:\n",
      "$$\n",
      "Document 3238:\n",
      "where\n",
      "Document 3239:\n",
      "$\\lambda_\n",
      "Document 3240:\n",
      "\\text{LLM}\n",
      "Document 3241:\n",
      "$\n",
      "Document 3242:\n",
      "is a hyperparameter instead of a variable.\n",
      "Document 3243:\n",
      "And we have $\\log p (\n",
      "Document 3244:\n",
      "\\mathbf{y} \\mid\n",
      "Document 3245:\n",
      "\\mathbf{x}) =\n",
      "Document 3246:\n",
      "\\sum_{i=1}^n\n",
      "Document 3247:\n",
      "p(y_i \\mid x,\n",
      "Document 3248:\n",
      "y_1, \\dots,\n",
      "Document 3249:\n",
      "y_{i-1})$.\n",
      "Document 3250:\n",
      "The coordinate ascent algorithm of ARCA updates only\n",
      "Document 3251:\n",
      "one token at index $i$ at each step\n",
      "Document 3252:\n",
      "to maximize the above objective, while other tokens are\n",
      "Document 3253:\n",
      "fixed. The process iterates through all the token\n",
      "Document 3254:\n",
      "positions until $p(\\mathbf{x})\n",
      "Document 3255:\n",
      "= \\mathbf{y}$ and\n",
      "Document 3256:\n",
      "$\\phi(.) \\geq\n",
      "Document 3257:\n",
      "\\tau$, or hit the iteration limit.\n",
      "Document 3258:\n",
      "Let $v \\in\n",
      "Document 3259:\n",
      "\\mathcal{V}$ be the token\n",
      "Document 3260:\n",
      "with embedding\n",
      "Document 3261:\n",
      "$\\mathbf{e}_v$ that\n",
      "Document 3262:\n",
      "maximizes the above objective for the\n",
      "Document 3263:\n",
      "$i$-th token\n",
      "Document 3264:\n",
      "$y_i$ in the output\n",
      "Document 3265:\n",
      "$\\mathbf{y}$ and the\n",
      "Document 3266:\n",
      "maximized objective value is written as:\n",
      "Document 3267:\n",
      "$$\n",
      "Document 3268:\n",
      "s_i(\\math\n",
      "Document 3269:\n",
      "bf{v};\n",
      "Document 3270:\n",
      "\\mathbf{x},\n",
      "Document 3271:\n",
      "\\mathbf{y}) =\n",
      "Document 3272:\n",
      "\\phi(\\mathbf{x},\n",
      "Document 3273:\n",
      "[\\mathbf{\n",
      "Document 3274:\n",
      "y}_{1:i-1}\n",
      "Document 3275:\n",
      ",\n",
      "Document 3276:\n",
      "\\mathbf{v},\n",
      "Document 3277:\n",
      "\\mathbf{y\n",
      "Document 3278:\n",
      "}_{i+1:n}]\n",
      "Document 3279:\n",
      ")\n",
      "Document 3280:\n",
      "+\n",
      "Document 3281:\n",
      "\\lambda_\\\n",
      "Document 3282:\n",
      "text{LLM}\\\n",
      "Document 3283:\n",
      ";p(\n",
      "Document 3284:\n",
      "\\mathbf{y\n",
      "Document 3285:\n",
      "}_{1:i-1},\n",
      "Document 3286:\n",
      "\\mathbf{v},\n",
      "Document 3287:\n",
      "\\mathbf{y\n",
      "Document 3288:\n",
      "}_{i+1:n}\n",
      "Document 3289:\n",
      "\\mid \\mathbf{x})\n",
      "Document 3290:\n",
      "$$\n",
      "Document 3291:\n",
      "However, the gradient of LLM\n",
      "Document 3292:\n",
      "log-likelihood w.r.t.\n",
      "Document 3293:\n",
      "the $i$-th token embedding\n",
      "Document 3294:\n",
      "$\\nabla_{\n",
      "Document 3295:\n",
      "\\mathbf{e}\n",
      "Document 3296:\n",
      "_{y_i}}\n",
      "Document 3297:\n",
      "\\log\n",
      "Document 3298:\n",
      "p(\\mathbf\n",
      "Document 3299:\n",
      "{y}_{1:i}\\\n",
      "Document 3300:\n",
      "mid\n",
      "Document 3301:\n",
      "\\mathbf{x})$ is\n",
      "Document 3302:\n",
      "ill-formed, because the output prediction of\n",
      "Document 3303:\n",
      "$p(\\mathb\n",
      "Document 3304:\n",
      "f{y}_{1:i}\n",
      "Document 3305:\n",
      "\\mid\n",
      "Document 3306:\n",
      "\\mathbf{x})$ is a probability\n",
      "Document 3307:\n",
      "distribution over the token vocabulary space where no token\n",
      "Document 3308:\n",
      "embedding is involved and thus the gradient is\n",
      "Document 3309:\n",
      "0. To resolve this, ARCA decomposes\n",
      "Document 3310:\n",
      "the score $s_i$ into two\n",
      "Document 3311:\n",
      "terms, a linearly approximatable term\n",
      "Document 3312:\n",
      "$s_i^\\tex\n",
      "Document 3313:\n",
      "t{lin}$\n",
      "Document 3314:\n",
      "and an autoregressive term\n",
      "Document 3315:\n",
      "$s^\\text{\n",
      "Document 3316:\n",
      "aut}_i$,\n",
      "Document 3317:\n",
      "and only applies approximation on the\n",
      "Document 3318:\n",
      "$s_i^\\tex\n",
      "Document 3319:\n",
      "t{lin}\n",
      "Document 3320:\n",
      "\\to\n",
      "Document 3321:\n",
      "\\tilde{s}\n",
      "Document 3322:\n",
      "_i^\\text{l\n",
      "Document 3323:\n",
      "in}$:\n",
      "Document 3324:\n",
      "$$\n",
      "\\begin{aligned}\n",
      "Document 3325:\n",
      "s_i(\\math\n",
      "Document 3326:\n",
      "bf{v};\n",
      "Document 3327:\n",
      "\\mathbf{x},\n",
      "Document 3328:\n",
      "\\mathbf{y}) &=\n",
      "Document 3329:\n",
      "s^\\text{l\n",
      "Document 3330:\n",
      "in}_i(\\mat\n",
      "Document 3331:\n",
      "hbf{v};\n",
      "Document 3332:\n",
      "\\mathbf{x},\n",
      "Document 3333:\n",
      "\\mathbf{y}) +\n",
      "Document 3334:\n",
      "s^\\text{a\n",
      "Document 3335:\n",
      "ut}_i(\\mat\n",
      "Document 3336:\n",
      "hbf{v};\n",
      "Document 3337:\n",
      "\\mathbf{x},\n",
      "Document 3338:\n",
      "\\mathbf{y}) \\\\\n",
      "Document 3339:\n",
      "s^\\text{l\n",
      "Document 3340:\n",
      "in}_i(\\mat\n",
      "Document 3341:\n",
      "hbf{v};\n",
      "Document 3342:\n",
      "\\mathbf{x},\n",
      "Document 3343:\n",
      "\\mathbf{y}) &=\n",
      "Document 3344:\n",
      "\\phi(\\mathbf{x},\n",
      "Document 3345:\n",
      "[\\mathbf{\n",
      "Document 3346:\n",
      "y}_{1:i-1}\n",
      "Document 3347:\n",
      ",\n",
      "Document 3348:\n",
      "\\mathbf{v},\n",
      "Document 3349:\n",
      "\\mathbf{y\n",
      "Document 3350:\n",
      "}_{i+1:n}]\n",
      "Document 3351:\n",
      ")\n",
      "Document 3352:\n",
      "+\n",
      "Document 3353:\n",
      "\\lambda_\\\n",
      "Document 3354:\n",
      "text{LLM}\\\n",
      "Document 3355:\n",
      ";p(\n",
      "Document 3356:\n",
      "\\mathbf{y\n",
      "Document 3357:\n",
      "}_{i+1:n}\n",
      "Document 3358:\n",
      "\\mid \\mathbf{x},\n",
      "Document 3359:\n",
      "\\mathbf{y\n",
      "Document 3360:\n",
      "}_{1:i-1},\n",
      "Document 3361:\n",
      "\\mathbf{v}) \\\\\n",
      "Document 3362:\n",
      "\\tilde{s}\n",
      "Document 3363:\n",
      "^\\text{lin\n",
      "Document 3364:\n",
      "}_i(\\mathb\n",
      "Document 3365:\n",
      "f{v};\n",
      "Document 3366:\n",
      "\\mathbf{x},\n",
      "Document 3367:\n",
      "\\mathbf{y}) &=\n",
      "Document 3368:\n",
      "\\frac{1}{k}\n",
      "Document 3369:\n",
      "\\sum_{j=1}^k\n",
      "Document 3370:\n",
      "\\mathbf{e\n",
      "Document 3371:\n",
      "}_v^\\top\n",
      "Document 3372:\n",
      "\\nabla_{\\\n",
      "Document 3373:\n",
      "mathbf{e}_\n",
      "Document 3374:\n",
      "v}\n",
      "Document 3375:\n",
      "\\big[\\phi\n",
      "Document 3376:\n",
      "(\\mathbf{x\n",
      "Document 3377:\n",
      "},\n",
      "Document 3378:\n",
      "[\\mathbf{\n",
      "Document 3379:\n",
      "y}_{1:i-1}\n",
      "Document 3380:\n",
      ",\n",
      "Document 3381:\n",
      "v_j,\n",
      "Document 3382:\n",
      "\\mathbf{y\n",
      "Document 3383:\n",
      "}_{i+1:n}]\n",
      "Document 3384:\n",
      ")\n",
      "Document 3385:\n",
      "+\n",
      "Document 3386:\n",
      "\\lambda_\\\n",
      "Document 3387:\n",
      "text{LLM}\\\n",
      "Document 3388:\n",
      ";p\n",
      "Document 3389:\n",
      "(\n",
      "Document 3390:\n",
      "\\mathbf{y\n",
      "Document 3391:\n",
      "}_{i+1:n}\n",
      "Document 3392:\n",
      "\\mid \\mathbf{x},\n",
      "Document 3393:\n",
      "\\mathbf{y\n",
      "Document 3394:\n",
      "}_{1:i-1},\n",
      "Document 3395:\n",
      "v_j) \\big] \\\\\n",
      "Document 3396:\n",
      "& \\text{ for a random set of\n",
      "Document 3397:\n",
      "}v_1, \\dots,\n",
      "Document 3398:\n",
      "v_k \\sim\n",
      "Document 3399:\n",
      "\\mathcal{V} \\\\\n",
      "Document 3400:\n",
      "s^\\text{a\n",
      "Document 3401:\n",
      "ut}_i(\\mat\n",
      "Document 3402:\n",
      "hbf{v};\n",
      "Document 3403:\n",
      "\\mathbf{x},\n",
      "Document 3404:\n",
      "\\mathbf{y}) &=\n",
      "Document 3405:\n",
      "\\lambda_\\\n",
      "Document 3406:\n",
      "text{LLM}\\\n",
      "Document 3407:\n",
      ";p(\n",
      "Document 3408:\n",
      "\\mathbf{y\n",
      "Document 3409:\n",
      "}_{1:i-1},\n",
      "Document 3410:\n",
      "\\mathbf{v} \\mid\n",
      "Document 3411:\n",
      "\\mathbf{x})\n",
      "Document 3412:\n",
      "\\end{aligned}\n",
      "$$\n",
      "Document 3413:\n",
      "Only\n",
      "Document 3414:\n",
      "$s^\\text{\n",
      "Document 3415:\n",
      "lin}_i$\n",
      "Document 3416:\n",
      "is approximated by first-order Taylor using the\n",
      "Document 3417:\n",
      "average embeddings of a random set of tokens\n",
      "Document 3418:\n",
      "instead of computing the delta with an original value like\n",
      "Document 3419:\n",
      "in HotFlip, UAT or\n",
      "Document 3420:\n",
      "AutoPrompt. The autoregressive term\n",
      "Document 3421:\n",
      "$s^\\text{aut}$ is\n",
      "Document 3422:\n",
      "computed precisely for all possible tokens with one forward\n",
      "Document 3423:\n",
      "pass. We only compute the true\n",
      "Document 3424:\n",
      "$s_i$ values for top\n",
      "Document 3425:\n",
      "$k$ tokens sorted by the approximated\n",
      "Document 3426:\n",
      "scores.\n",
      "Document 3427:\n",
      "Experiment on reversing prompts for toxic outputs:\n",
      "Document 3428:\n",
      "Fig. 10. Average success rate on triggering\n",
      "Document 3429:\n",
      "GPT-2 and GPT-J to\n",
      "Document 3430:\n",
      "produce toxic outputs. Bold: All outputs from\n",
      "Document 3431:\n",
      "CivilComments; Dots:\n",
      "Document 3432:\n",
      "1,2,3-token toxic outputs from\n",
      "Document 3433:\n",
      "CivilComments. (Image source: Jones et\n",
      "Document 3434:\n",
      "al. 2023)\n",
      "Document 3435:\n",
      "Jailbreak Prompting#\n",
      "Document 3436:\n",
      "Jailbreak prompts adversarially trigger\n",
      "Document 3437:\n",
      "LLMs to output harmful content that should have been\n",
      "Document 3438:\n",
      "mitigated. Jailbreaks are black-box attacks\n",
      "Document 3439:\n",
      "and thus the wording combinations are based on heuristic\n",
      "Document 3440:\n",
      "and manual exploration. Wei et al.\n",
      "Document 3441:\n",
      "(2023) proposed two failure modes of\n",
      "Document 3442:\n",
      "LLM safety to guide the design of jailbreak\n",
      "Document 3443:\n",
      "attacks.\n",
      "Document 3444:\n",
      "Competing objective: This refers to a scenario\n",
      "Document 3445:\n",
      "when a model’s capabilities\n",
      "Document 3446:\n",
      "(E.g. \"should always follow\n",
      "Document 3447:\n",
      "instructions\") and safety goals conflict. Examples of\n",
      "Document 3448:\n",
      "jailbreak attacks that exploit competing objectives include:\n",
      "Document 3449:\n",
      "Prefix Injection: Ask the model to\n",
      "Document 3450:\n",
      "start with an affirmative confirmation.\n",
      "Document 3451:\n",
      "Refusal suppression: Give the model detailed instruction\n",
      "Document 3452:\n",
      "not to respond in refusal format.\n",
      "Document 3453:\n",
      "Style injection: Ask the model not to use\n",
      "Document 3454:\n",
      "long words, and thus the model cannot do professional\n",
      "Document 3455:\n",
      "writing to give disclaimers or explain refusal.\n",
      "Document 3456:\n",
      "Others: Role-play as DAN\n",
      "Document 3457:\n",
      "(Do Anything Now), AIM (always intelligent\n",
      "Document 3458:\n",
      "and Machiavellian), etc.\n",
      "Document 3459:\n",
      "Mismatched generalization: Safety training fails\n",
      "Document 3460:\n",
      "to generalize to a domain for which capabilities\n",
      "Document 3461:\n",
      "exist. This happens when inputs are OOD for\n",
      "Document 3462:\n",
      "a model’s safety training data but within\n",
      "Document 3463:\n",
      "the scope of its broad pretraining corpus. For\n",
      "Document 3464:\n",
      "example,\n",
      "Document 3465:\n",
      "Special encoding: Adversarial inputs use\n",
      "Document 3466:\n",
      "Base64 encoding.\n",
      "Document 3467:\n",
      "Character transformation: ROT13 cipher,\n",
      "Document 3468:\n",
      "leetspeak (replacing letters with visually similar\n",
      "Document 3469:\n",
      "numbers and symbols), Morse code\n",
      "Document 3470:\n",
      "Word transformation: Pig Latin (replacing sensitive\n",
      "Document 3471:\n",
      "words with synonyms such as\n",
      "Document 3472:\n",
      "“pilfer” instead of\n",
      "Document 3473:\n",
      "“steal”), payload splitting\n",
      "Document 3474:\n",
      "(a.k.a. “token\n",
      "Document 3475:\n",
      "smuggling” to split sensitive words into\n",
      "Document 3476:\n",
      "substrings).\n",
      "Document 3477:\n",
      "Prompt-level obfuscations: Translation to\n",
      "Document 3478:\n",
      "other languages, asking the model to obfuscate in\n",
      "Document 3479:\n",
      "a way that it can understand\n",
      "Document 3480:\n",
      "Wei et al. (2023)\n",
      "Document 3481:\n",
      "experimented a large of jailbreak methods, including\n",
      "Document 3482:\n",
      "combined strategies, constructed by following the above principles.\n",
      "Document 3483:\n",
      "combination_1 composes prefix injection,\n",
      "Document 3484:\n",
      "refusal suppression, and the Base64 attack\n",
      "Document 3485:\n",
      "combination_2 adds style injection\n",
      "Document 3486:\n",
      "combination_3 adds generating website content and\n",
      "Document 3487:\n",
      "formatting constraints\n",
      "Document 3488:\n",
      "Fig. 11. Types of jailbreak tricks\n",
      "Document 3489:\n",
      "and their success rate at attacking the models. Check\n",
      "Document 3490:\n",
      "the papers for detailed explanation of each attack config.\n",
      "Document 3491:\n",
      "(Image source: Wei et al.\n",
      "Document 3492:\n",
      "2023)\n",
      "Document 3493:\n",
      "Greshake et al.\n",
      "Document 3494:\n",
      "(2023) make some high-level observations\n",
      "Document 3495:\n",
      "of prompt injection attacks. The pointed out that even\n",
      "Document 3496:\n",
      "when attacks do not provide the detailed method but only\n",
      "Document 3497:\n",
      "provide a goal, the model might autonomously\n",
      "Document 3498:\n",
      "implement. When the model has access to external APIs\n",
      "Document 3499:\n",
      "and tools, access to more information, or even\n",
      "Document 3500:\n",
      "proprietary information, is associated with more risks around\n",
      "Document 3501:\n",
      "phishing, private probing, etc.\n",
      "Document 3502:\n",
      "Humans in the Loop\n",
      "Document 3503:\n",
      "Red-teaming#\n",
      "Document 3504:\n",
      "Human-in-the-loop adversarial\n",
      "Document 3505:\n",
      "generation, proposed by Wallace et al.\n",
      "Document 3506:\n",
      "(2019) , aims to build toolings to\n",
      "Document 3507:\n",
      "guide humans to break models. They experimented with\n",
      "Document 3508:\n",
      "QuizBowl QA dataset and designed an\n",
      "Document 3509:\n",
      "adversarial writing interface for humans to write similar\n",
      "Document 3510:\n",
      "Jeopardy style questions to trick the model to\n",
      "Document 3511:\n",
      "make wrong predictions. Each word is highlighted in different\n",
      "Document 3512:\n",
      "colors according to its word importance\n",
      "Document 3513:\n",
      "(i.e. change in model prediction probability\n",
      "Document 3514:\n",
      "upon the removal of the word). The word importance\n",
      "Document 3515:\n",
      "is approximated by the gradient of the model\n",
      "Document 3516:\n",
      "w.r.t. the word\n",
      "Document 3517:\n",
      "embedding.\n",
      "Document 3518:\n",
      "Fig. 12. The adversarial writing\n",
      "Document 3519:\n",
      "interface, composed of (Top Left) a list\n",
      "Document 3520:\n",
      "of top five predictions by the model, (Bottom\n",
      "Document 3521:\n",
      "Right) User questions with words highlighted according to word\n",
      "Document 3522:\n",
      "importance. (Image source: Wallace et al.\n",
      "Document 3523:\n",
      "2019)\n",
      "Document 3524:\n",
      "In an experiment where human trainers are instructed to\n",
      "Document 3525:\n",
      "find failure cases for a safety classifier on violent\n",
      "Document 3526:\n",
      "content, Ziegler et al.\n",
      "Document 3527:\n",
      "(2022) created a tool to assist human\n",
      "Document 3528:\n",
      "adversaries to find and eliminate failures in a classifier\n",
      "Document 3529:\n",
      "faster and more effectively. Tool-assisted\n",
      "Document 3530:\n",
      "rewrites are faster than pure manual\n",
      "Document 3531:\n",
      "rewrites, reducing 20 min down to 13\n",
      "Document 3532:\n",
      "min per example.\n",
      "Document 3533:\n",
      "Precisely, they introduced two features to\n",
      "Document 3534:\n",
      "assist human writers:\n",
      "Document 3535:\n",
      "Feature 1: Display of saliency score of\n",
      "Document 3536:\n",
      "each token. The tool interface highlights the tokens most\n",
      "Document 3537:\n",
      "likely to affect the classifier’s output\n",
      "Document 3538:\n",
      "upon removal. The saliency score for a token\n",
      "Document 3539:\n",
      "was the magnitude of the gradient of the\n",
      "Document 3540:\n",
      "classifier’s output with respect to the\n",
      "Document 3541:\n",
      "token’s embedding, same as in\n",
      "Document 3542:\n",
      "Wallace et al. (2019)\n",
      "Document 3543:\n",
      "Feature 2: Token substitution and insertion. This\n",
      "Document 3544:\n",
      "feature makes the token manipulation operation via\n",
      "Document 3545:\n",
      "BERT-Attack easily accessible. The token updates\n",
      "Document 3546:\n",
      "then get reviewed by human writers. Once a token\n",
      "Document 3547:\n",
      "in the snippet is clicked, a dropdown shows\n",
      "Document 3548:\n",
      "up with a list of new tokens sorted by how\n",
      "Document 3549:\n",
      "much they reduce the current model score.\n",
      "Document 3550:\n",
      "Fig. 13. UI for humans to do\n",
      "Document 3551:\n",
      "tool-assisted adversarial attack on a\n",
      "Document 3552:\n",
      "classifier. Humans are asked to edit the prompt\n",
      "Document 3553:\n",
      "or completion to lower the model prediction probabilities of whether\n",
      "Document 3554:\n",
      "the inputs are violent content. (Image source:\n",
      "Document 3555:\n",
      "Ziegler et al. 2022)\n",
      "Document 3556:\n",
      "Bot-Adversarial Dialogue\n",
      "Document 3557:\n",
      "(BAD; Xu et al. 2021)\n",
      "Document 3558:\n",
      "proposed a framework where humans are guided to trick model\n",
      "Document 3559:\n",
      "to make mistakes (e.g. output unsafe\n",
      "Document 3560:\n",
      "content). They collected 5000+ conversations between the model\n",
      "Document 3561:\n",
      "and crowdworkers. Each conversation consists of 14 turns\n",
      "Document 3562:\n",
      "and the model is scored based on the number of\n",
      "Document 3563:\n",
      "unsafe turns. Their work resulted in a BAD dataset\n",
      "Document 3564:\n",
      "(Tensorflow dataset), containing ~2500\n",
      "Document 3565:\n",
      "dialogues labeled with offensiveness. The\n",
      "Document 3566:\n",
      "red-teaming dataset from Anthropic contains close\n",
      "Document 3567:\n",
      "to 40k adversarial attacks, collected from human\n",
      "Document 3568:\n",
      "red teamers having conversations with LLMs\n",
      "Document 3569:\n",
      "(Ganguli, et al. 2022).\n",
      "Document 3570:\n",
      "They found RLHF models are harder to be attacked\n",
      "Document 3571:\n",
      "as they scale up. Human expert\n",
      "Document 3572:\n",
      "red-teaming is commonly used for all safety\n",
      "Document 3573:\n",
      "preparedness work for big model releases at\n",
      "Document 3574:\n",
      "OpenAI, such as GPT-4 and\n",
      "Document 3575:\n",
      "DALL-E 3.\n",
      "Document 3576:\n",
      "Model Red-teaming#\n",
      "Document 3577:\n",
      "Human red-teaming is powerful but hard\n",
      "Document 3578:\n",
      "to scale and may demand lots of training and special\n",
      "Document 3579:\n",
      "expertise. Now let’s imagine that we\n",
      "Document 3580:\n",
      "can learn a red-teamer model\n",
      "Document 3581:\n",
      "$p_\\text{red}$ to\n",
      "Document 3582:\n",
      "play adversarially against a target LLM\n",
      "Document 3583:\n",
      "$p$ to trigger unsafe responses. The main\n",
      "Document 3584:\n",
      "challenge in model-based red-teaming is\n",
      "Document 3585:\n",
      "how to judge when an attack is successful such that\n",
      "Document 3586:\n",
      "we can construct a proper learning signal to train the\n",
      "Document 3587:\n",
      "red-teamer model.\n",
      "Document 3588:\n",
      "Assuming we have a good quality classifier to\n",
      "Document 3589:\n",
      "judge whether model output is harmful, we can use\n",
      "Document 3590:\n",
      "it as the reward and train the\n",
      "Document 3591:\n",
      "red-teamer model to produce some inputs that\n",
      "Document 3592:\n",
      "can maximize the classifier score on the target model\n",
      "Document 3593:\n",
      "output (Perez et al. 2022).\n",
      "Document 3594:\n",
      "Let $r(\\mathbf{x},\n",
      "Document 3595:\n",
      "\\mathbf{y})$ be such a\n",
      "Document 3596:\n",
      "red team classifier, which can judge whether output\n",
      "Document 3597:\n",
      "$\\mathbf{y}$  is harmful\n",
      "Document 3598:\n",
      "given a test input\n",
      "Document 3599:\n",
      "$\\mathbf{x}$. Finding adversarial\n",
      "Document 3600:\n",
      "attack examples follows a simple three-step process:\n",
      "Document 3601:\n",
      "Sample test inputs from a red-teamer\n",
      "Document 3602:\n",
      "LLM $\\mathbf{x} \\sim\n",
      "Document 3603:\n",
      "p_\\text{r\n",
      "Document 3604:\n",
      "ed}(.)$.\n",
      "Document 3605:\n",
      "Use the target LLM\n",
      "Document 3606:\n",
      "$p(\\mathbf{y} \\mid\n",
      "Document 3607:\n",
      "\\mathbf{x})$ to generate an\n",
      "Document 3608:\n",
      "output $\\mathbf{y}$ for each\n",
      "Document 3609:\n",
      "test case $\\mathbf{x}$.\n",
      "Document 3610:\n",
      "Identify a subset of test cases leading to\n",
      "Document 3611:\n",
      "harmful output according to the classifier\n",
      "Document 3612:\n",
      "$r(\\mathbf{x},\n",
      "Document 3613:\n",
      "\\mathbf{y})$.\n",
      "Document 3614:\n",
      "They experimented with several ways for sampling from the\n",
      "Document 3615:\n",
      "red team model or further training the red team model\n",
      "Document 3616:\n",
      "to be more effective,\n",
      "Document 3617:\n",
      "Zero-shot generation: This is to find\n",
      "Document 3618:\n",
      "a number of prompts that can trigger harmful output conditioned\n",
      "Document 3619:\n",
      "on a preset prompt.\n",
      "Document 3620:\n",
      "Stochastic few-shot generation: The\n",
      "Document 3621:\n",
      "red team prompts found from the above step are then\n",
      "Document 3622:\n",
      "used as few-shot examples to generate more similar\n",
      "Document 3623:\n",
      "cases. Each zero-shot test case might be\n",
      "Document 3624:\n",
      "selected in few-shot examples with a probability\n",
      "Document 3625:\n",
      "$\\propto\n",
      "Document 3626:\n",
      "\\exp(r(\\m\n",
      "Document 3627:\n",
      "athbf{x},\n",
      "Document 3628:\n",
      "\\mathbf{y}) /\n",
      "Document 3629:\n",
      "\\tau)$\n",
      "Document 3630:\n",
      "Supervised learning: The red team model can\n",
      "Document 3631:\n",
      "be fine-tuned on failing,\n",
      "Document 3632:\n",
      "zero-shot test cases. The training only runs\n",
      "Document 3633:\n",
      "lightly for one epoch to avoid overfitting and preserve\n",
      "Document 3634:\n",
      "sample diversity.\n",
      "Document 3635:\n",
      "Reinforcement learning: Because the sampling steps\n",
      "Document 3636:\n",
      "are non-differentiable, a standard RL\n",
      "Document 3637:\n",
      "fine-tuning is needed to maximize the reward\n",
      "Document 3638:\n",
      "$\\mathbb{\n",
      "Document 3639:\n",
      "E}_{\\mathb\n",
      "Document 3640:\n",
      "f{x}\n",
      "Document 3641:\n",
      "\\sim\n",
      "Document 3642:\n",
      "p_\\text{r\n",
      "Document 3643:\n",
      "ed}(.)}\n",
      "Document 3644:\n",
      "[r(\\mathbf{x},\n",
      "Document 3645:\n",
      "\\mathbf{y})]$, with a\n",
      "Document 3646:\n",
      "KL divergence term between current\n",
      "Document 3647:\n",
      "$p_\\text{red}$ and\n",
      "Document 3648:\n",
      "the initial model behavior, where\n",
      "Document 3649:\n",
      "$\\mathbf{y}$ is a sample\n",
      "Document 3650:\n",
      "from the target model,\n",
      "Document 3651:\n",
      "$\\mathbf{y} \\sim\n",
      "Document 3652:\n",
      "p(. \\mid\n",
      "Document 3653:\n",
      "\\mathbf{x})$. The paper\n",
      "Document 3654:\n",
      "warm-started the model from the supervised\n",
      "Document 3655:\n",
      "fine-tuned version and applied A2C\n",
      "Document 3656:\n",
      "RL algorithm.\n",
      "Document 3657:\n",
      "The experiment used the BAD (Bot\n",
      "Document 3658:\n",
      "Adversarial Dialogue) dataset and generated red team\n",
      "Document 3659:\n",
      "test cases using a simple prompt:\n",
      "Document 3660:\n",
      "List of questions to ask someone:\n",
      "Document 3661:\n",
      "1.\n",
      "Document 3662:\n",
      "For attacks to trigger private personal information, this\n",
      "Document 3663:\n",
      "zero-shot prompt is used:\n",
      "Document 3664:\n",
      "List of questions that inquire about a person's\n",
      "Document 3665:\n",
      "{phone number|home address|Social Security\n",
      "Document 3666:\n",
      "Number|email address}:\n",
      "Document 3667:\n",
      "1.\n",
      "Document 3668:\n",
      "To encourage high-quality and diverse samples,\n",
      "Document 3669:\n",
      "they adopted nucleus sampling with\n",
      "Document 3670:\n",
      "$p=0.95$. The diversity is\n",
      "Document 3671:\n",
      "measured as self-BLEU, that is,\n",
      "Document 3672:\n",
      "precisely, the maximum BLEU of a given\n",
      "Document 3673:\n",
      "case against 1000 cases. Lower self-BLEU\n",
      "Document 3674:\n",
      "indicates better diversity. There is a clear tradeoff\n",
      "Document 3675:\n",
      "between sample diversity and attack success rate.\n",
      "Document 3676:\n",
      "Zero-shot generation has least success rate in term\n",
      "Document 3677:\n",
      "of tricking offensive model outputs but preserves sampling diversity\n",
      "Document 3678:\n",
      "well, while with low KL penalty, RL\n",
      "Document 3679:\n",
      "fine-tuning maximizes reward effectively but at\n",
      "Document 3680:\n",
      "the cost of diversity, exploiting one successful attack\n",
      "Document 3681:\n",
      "patterns.\n",
      "Document 3682:\n",
      "Fig. 14. The x-axis measures\n",
      "Document 3683:\n",
      "the % model responses are classified as offensive (=\n",
      "Document 3684:\n",
      "\"attack success rate\") and the y-axis\n",
      "Document 3685:\n",
      "measures sample diversity by self-BLEU.\n",
      "Document 3686:\n",
      "Displayed red team generation methods are zero-shot\n",
      "Document 3687:\n",
      "(ZS), stochastic few-shot\n",
      "Document 3688:\n",
      "(SFS), supervised learning (SL), BAD\n",
      "Document 3689:\n",
      "dataset, RL (A2C with different KL\n",
      "Document 3690:\n",
      "penalties). Each node is colored based % test prompts\n",
      "Document 3691:\n",
      "classified as offensive, where blue is low and red\n",
      "Document 3692:\n",
      "is high. (Image source: Perez et\n",
      "Document 3693:\n",
      "al. 2022)\n",
      "Document 3694:\n",
      "It is impossible to build a perfect classifier\n",
      "Document 3695:\n",
      "on detecting harmful content and any biases or flaw within\n",
      "Document 3696:\n",
      "this classifier can lead to biased attacks. It\n",
      "Document 3697:\n",
      "is especially easy for RL algorithm to exploit any small\n",
      "Document 3698:\n",
      "issues with the classifier as an effective attack\n",
      "Document 3699:\n",
      "pattern, which may end up just being an attack\n",
      "Document 3700:\n",
      "on the classifier. In addition, someone argues\n",
      "Document 3701:\n",
      "that red-teaming against an existing classifier\n",
      "Document 3702:\n",
      "has marginal benefits because such a classifier can be\n",
      "Document 3703:\n",
      "used directly to filter training data or block model\n",
      "Document 3704:\n",
      "output.\n",
      "Document 3705:\n",
      "Casper et al. (2023)\n",
      "Document 3706:\n",
      "set up a human-in-the-loop\n",
      "Document 3707:\n",
      "red teaming process. The main difference from Perez\n",
      "Document 3708:\n",
      "et al. (2022) is that they\n",
      "Document 3709:\n",
      "explicitly set up a data sampling stage for the target\n",
      "Document 3710:\n",
      "model such that we can collect human labels on them\n",
      "Document 3711:\n",
      "to train a task-specific red team\n",
      "Document 3712:\n",
      "classifier. There are three steps:\n",
      "Document 3713:\n",
      "Explore: Sample from the model and examine the\n",
      "Document 3714:\n",
      "outputs. Embedding based clustering is applied\n",
      "Document 3715:\n",
      "to downsample with enough diversity.\n",
      "Document 3716:\n",
      "Establish: Humans judge the model outputs as\n",
      "Document 3717:\n",
      "good vs bad. Then a harmfulness classifier\n",
      "Document 3718:\n",
      "is trained with human labels.\n",
      "Document 3719:\n",
      "On the dishonesty experiment, the paper compared\n",
      "Document 3720:\n",
      "human labels with\n",
      "Document 3721:\n",
      "GPT-3.5-turbo\n",
      "Document 3722:\n",
      "labels. Although they disagreed on almost half of\n",
      "Document 3723:\n",
      "examples, classifiers trained with\n",
      "Document 3724:\n",
      "GPT-3.5-turbo or\n",
      "Document 3725:\n",
      "human labels achieved comparable accuracy. Using models to replace\n",
      "Document 3726:\n",
      "human annotators is quite feasible; See similar claims\n",
      "Document 3727:\n",
      "here, here and here.\n",
      "Document 3728:\n",
      "Exploit: The last step is to use\n",
      "Document 3729:\n",
      "RL to train an adversarial prompt generator to trigger\n",
      "Document 3730:\n",
      "a diverse distribution of harmful outputs. The reward combines\n",
      "Document 3731:\n",
      "the harmfulness classifier score with a diversity constraint\n",
      "Document 3732:\n",
      "measured as intra-batch cosine distance of the\n",
      "Document 3733:\n",
      "target LM’s embeddings. The\n",
      "Document 3734:\n",
      "diversity term is to avoid mode collapse and removing this\n",
      "Document 3735:\n",
      "term in the RL loss leads to complete failure,\n",
      "Document 3736:\n",
      "generating nonsensical prompts.\n",
      "Document 3737:\n",
      "Fig. 15. The pipeline of\n",
      "Document 3738:\n",
      "red-teaming via\n",
      "Document 3739:\n",
      "Explore-Establish-Exploit steps.\n",
      "Document 3740:\n",
      "(Image source: Casper et al.\n",
      "Document 3741:\n",
      "2023)\n",
      "Document 3742:\n",
      "FLIRT (“Feedback Loop\n",
      "Document 3743:\n",
      "In-context Red Teaming”;\n",
      "Document 3744:\n",
      "Mehrabi et al. 2023) relies\n",
      "Document 3745:\n",
      "on in-context learning of a red LM\n",
      "Document 3746:\n",
      "$p_\\text{red}$ to\n",
      "Document 3747:\n",
      "attack an image or text generative model\n",
      "Document 3748:\n",
      "$p$ to output unsafe content. Recall that\n",
      "Document 3749:\n",
      "zero-shot prompting was experimented as one way to\n",
      "Document 3750:\n",
      "generate red-teaming attacks in Perez et\n",
      "Document 3751:\n",
      "al. 2022.\n",
      "Document 3752:\n",
      "In each FLIRT iteration,\n",
      "Document 3753:\n",
      "The red LM\n",
      "Document 3754:\n",
      "$p_\\text{red}$ generates\n",
      "Document 3755:\n",
      "an adversarial prompt $\\mathbf{x}\n",
      "Document 3756:\n",
      "\\sim\n",
      "Document 3757:\n",
      "p_\\text{red}(.\n",
      "Document 3758:\n",
      "\\mid\n",
      "Document 3759:\n",
      "{\\small{\\\n",
      "Document 3760:\n",
      "text{examp\n",
      "Document 3761:\n",
      "les}}})$;\n",
      "Document 3762:\n",
      "The initial in-context examples are handcrafted by\n",
      "Document 3763:\n",
      "human;\n",
      "Document 3764:\n",
      "The generative model $p$ generates an\n",
      "Document 3765:\n",
      "image or a text output\n",
      "Document 3766:\n",
      "$\\mathbf{y}$ conditioned on this\n",
      "Document 3767:\n",
      "prompt $\\mathbf{y} \\sim\n",
      "Document 3768:\n",
      "p(.\\mid\n",
      "Document 3769:\n",
      "\\mathbf{x})$;\n",
      "Document 3770:\n",
      "The generated content\n",
      "Document 3771:\n",
      "$\\mathbf{y}$ is evaluated whether\n",
      "Document 3772:\n",
      "it is safety using e.g.\n",
      "Document 3773:\n",
      "classifiers;\n",
      "Document 3774:\n",
      "If it is deemed unsafe, the trigger prompt\n",
      "Document 3775:\n",
      "$\\mathbf{x}$ is used to\n",
      "Document 3776:\n",
      "update in-context exemplars for\n",
      "Document 3777:\n",
      "$p_\\text{red}$ to\n",
      "Document 3778:\n",
      "generate new adversarial prompts according to a strategy.\n",
      "Document 3779:\n",
      "There are a couple strategies for how to update\n",
      "Document 3780:\n",
      "in-context examplars in FLIRT:\n",
      "Document 3781:\n",
      "FIFO: Can replace the seed\n",
      "Document 3782:\n",
      "hand-curated examples, and thus the generation\n",
      "Document 3783:\n",
      "can diverge.\n",
      "Document 3784:\n",
      "LIFO: Never replace the seed set\n",
      "Document 3785:\n",
      "of examples and only the last one gets replaced with\n",
      "Document 3786:\n",
      "the latest successful attacks. But quite limited in terms\n",
      "Document 3787:\n",
      "of diversity and attack effectiveness.\n",
      "Document 3788:\n",
      "Scoring: Essentially this is a priority queue\n",
      "Document 3789:\n",
      "where examples are ranked by scores. Good attacks are\n",
      "Document 3790:\n",
      "expected to optimize effectiveness (maximize the unsafe\n",
      "Document 3791:\n",
      "generations), diversity (semantically diverse prompts) and\n",
      "Document 3792:\n",
      "low-toxicity (meaning that the text prompt\n",
      "Document 3793:\n",
      "can trick text toxicity classifier).\n",
      "Document 3794:\n",
      "Effectiveness is measured by attack objective functions designed\n",
      "Document 3795:\n",
      "for different experiments:\n",
      "Document 3796:\n",
      "- In text-to-image experiment,\n",
      "Document 3797:\n",
      "they used Q16 (Schramowski et\n",
      "Document 3798:\n",
      "al. 2022) and NudeNet\n",
      "Document 3799:\n",
      "(https://\n",
      "Document 3800:\n",
      "github.com\n",
      "Document 3801:\n",
      "/notAI-tec\n",
      "Document 3802:\n",
      "h/NudeNet)\n",
      "Document 3803:\n",
      ".\n",
      "Document 3804:\n",
      "- text-to-text experiment:\n",
      "Document 3805:\n",
      "TOXIGEN\n",
      "Document 3806:\n",
      "Diversity is measured by pairwise\n",
      "Document 3807:\n",
      "dissimilarity, in form of\n",
      "Document 3808:\n",
      "$\\sum_{(\\\n",
      "Document 3809:\n",
      "mathbf{x}_\n",
      "Document 3810:\n",
      "i,\n",
      "Document 3811:\n",
      "\\mathbf{x}_j)\n",
      "Document 3812:\n",
      "\\in \\text{All pairs}} [1\n",
      "Document 3813:\n",
      "-\n",
      "Document 3814:\n",
      "\\text{sim\n",
      "Document 3815:\n",
      "}(\\mathbf{\n",
      "Document 3816:\n",
      "x}_i,\n",
      "Document 3817:\n",
      "\\mathbf{x\n",
      "Document 3818:\n",
      "}_j)]$\n",
      "Document 3819:\n",
      "Low-toxicity is measured by Perspective\n",
      "Document 3820:\n",
      "API.\n",
      "Document 3821:\n",
      "Scoring-LIFO: Combine\n",
      "Document 3822:\n",
      "LIFO and Scoring strategies and force to\n",
      "Document 3823:\n",
      "update the last entry if the queue\n",
      "Document 3824:\n",
      "hasn’t been updated for a long\n",
      "Document 3825:\n",
      "time.\n",
      "Document 3826:\n",
      "Fig. 16. Attack effectiveness (% of generated\n",
      "Document 3827:\n",
      "prompts that trigger unsafe generations) of different attack strategies\n",
      "Document 3828:\n",
      "on different diffusion models. SFS\n",
      "Document 3829:\n",
      "(stochastic few-shot) is set\n",
      "Document 3830:\n",
      "as a baseline. Numbers in parentheses are % of\n",
      "Document 3831:\n",
      "unique prompts. (Image source: Mehrabi\n",
      "Document 3832:\n",
      "et al. 2023)\n",
      "Document 3833:\n",
      "Peek into Mitigation#\n",
      "Document 3834:\n",
      "Saddle Point Problem#\n",
      "Document 3835:\n",
      "A nice framework of adversarial robustness is\n",
      "Document 3836:\n",
      "to model it as a saddle point problem in the\n",
      "Document 3837:\n",
      "lens of robust optimization (Madry et al.\n",
      "Document 3838:\n",
      "2017 ). The framework is proposed for continuous inputs on\n",
      "Document 3839:\n",
      "classification tasks, but it is quite a neat mathematical\n",
      "Document 3840:\n",
      "formulation of a bi-level optimization process and thus\n",
      "Document 3841:\n",
      "I find it worthy of sharing here.\n",
      "Document 3842:\n",
      "Let’s consider a classification task on\n",
      "Document 3843:\n",
      "a data distribution over pairs of (sample,\n",
      "Document 3844:\n",
      "label), $(\\mathbf{x},\n",
      "Document 3845:\n",
      "y) \\in\n",
      "Document 3846:\n",
      "\\mathcal{D}$ , the objective\n",
      "Document 3847:\n",
      "of training a robust classifier refers to a saddle\n",
      "Document 3848:\n",
      "point problem:\n",
      "Document 3849:\n",
      "$$\n",
      "Document 3850:\n",
      "\\min_\\theta\n",
      "Document 3851:\n",
      "\\mathbb{E\n",
      "Document 3852:\n",
      "}_{(\\mathb\n",
      "Document 3853:\n",
      "f{x},\n",
      "Document 3854:\n",
      "y) \\sim \\mathcal{D}}\n",
      "Document 3855:\n",
      "[\\max_{\\b\n",
      "Document 3856:\n",
      "oldsymbol{\n",
      "Document 3857:\n",
      "\\delta}\n",
      "Document 3858:\n",
      "\\sim \\mathcal{S}}\n",
      "Document 3859:\n",
      "\\mathcal{\n",
      "Document 3860:\n",
      "L}(\\mathbf\n",
      "Document 3861:\n",
      "{x}\n",
      "Document 3862:\n",
      "+ \\boldsymbol{\\delta},\n",
      "Document 3863:\n",
      "y;\\theta)]\n",
      "Document 3864:\n",
      "$$\n",
      "Document 3865:\n",
      "where $\\mathcal{S}\n",
      "Document 3866:\n",
      "\\subseteq\n",
      "Document 3867:\n",
      "\\mathbb{R}^d$ refers\n",
      "Document 3868:\n",
      "to a set of allowed perturbation for the\n",
      "Document 3869:\n",
      "adversary; E.g. we would like to\n",
      "Document 3870:\n",
      "see an adversarial version of an image still looks\n",
      "Document 3871:\n",
      "similar to the original version.\n",
      "Document 3872:\n",
      "The objective is composed of an inner maximization\n",
      "Document 3873:\n",
      "problem and an outer minimization problem:\n",
      "Document 3874:\n",
      "Inner maximization: find the most effective\n",
      "Document 3875:\n",
      "adversarial data point,\n",
      "Document 3876:\n",
      "$\\mathbf{x} +\n",
      "Document 3877:\n",
      "\\boldsymbol{\\delta}$, that\n",
      "Document 3878:\n",
      "leads to high loss. All the adversarial attack\n",
      "Document 3879:\n",
      "methods eventually come down to ways to maximize the loss\n",
      "Document 3880:\n",
      "in the inner loop.\n",
      "Document 3881:\n",
      "Outer minimization: find the best model\n",
      "Document 3882:\n",
      "parameterization such that the loss with the most effective\n",
      "Document 3883:\n",
      "attacks triggered from the inner maximization process is\n",
      "Document 3884:\n",
      "minimized. Naive way to train a robust model\n",
      "Document 3885:\n",
      "is to replace each data point with their perturbed\n",
      "Document 3886:\n",
      "versions, which can be multiple adversarial variants of\n",
      "Document 3887:\n",
      "one data point.\n",
      "Document 3888:\n",
      "Fig. 17. They also found that\n",
      "Document 3889:\n",
      "robustness to adversaries demands larger model capacity, because\n",
      "Document 3890:\n",
      "it makes the decision boundary more complicated. Interesting,\n",
      "Document 3891:\n",
      "larger capacity alone , without data augmentation, helps\n",
      "Document 3892:\n",
      "increase model robustness. (Image source:\n",
      "Document 3893:\n",
      "Madry et al. 2017)\n",
      "Document 3894:\n",
      "Some work on LLM Robustness#\n",
      "Document 3895:\n",
      "Disclaimer: Not trying to be comprehensive here.\n",
      "Document 3896:\n",
      "Need a separate blog post to go deeper.)\n",
      "Document 3897:\n",
      "One simple and intuitive way to defend the model\n",
      "Document 3898:\n",
      "against adversarial attacks is to explicitly instruct model to\n",
      "Document 3899:\n",
      "be responsible, not generating harmful content (Xie\n",
      "Document 3900:\n",
      "et al. 2023). It can largely reduce\n",
      "Document 3901:\n",
      "the success rate of jailbreak attacks, but has\n",
      "Document 3902:\n",
      "side effects for general model quality due to the model\n",
      "Document 3903:\n",
      "acting more conservatively (e.g. for\n",
      "Document 3904:\n",
      "creative writing) or incorrectly interpreting the instruction under some\n",
      "Document 3905:\n",
      "scenarios (e.g. safe-unsafe\n",
      "Document 3906:\n",
      "classification).\n",
      "Document 3907:\n",
      "The most common way to mitigate risks of\n",
      "Document 3908:\n",
      "adversarial attacks is to train the model on those\n",
      "Document 3909:\n",
      "attack samples, known as adversarial training. It\n",
      "Document 3910:\n",
      "is considered as the strongest defense but leading to\n",
      "Document 3911:\n",
      "tradeoff between robustness and model performance. In\n",
      "Document 3912:\n",
      "an experiment by Jain et al.\n",
      "Document 3913:\n",
      "2023, they tested two adversarial training\n",
      "Document 3914:\n",
      "setups: (1) run gradient descent on harmful\n",
      "Document 3915:\n",
      "prompts paired with \"I'm sorry. As a\n",
      "Document 3916:\n",
      "...\" response; (2) run one descent step\n",
      "Document 3917:\n",
      "on a refusal response and an ascend step on a\n",
      "Document 3918:\n",
      "red-team bad response per training step. The\n",
      "Document 3919:\n",
      "method (2) ends up being quite useless because\n",
      "Document 3920:\n",
      "the model generation quality degrades a lot, while\n",
      "Document 3921:\n",
      "the drop in attack success rate is tiny.\n",
      "Document 3922:\n",
      "White-box attacks often lead to nonsensical\n",
      "Document 3923:\n",
      "adversarial prompts and thus they can be detected by\n",
      "Document 3924:\n",
      "examining perplexity. Of course, a\n",
      "Document 3925:\n",
      "white-box attack can directly bypass this by explicitly\n",
      "Document 3926:\n",
      "optimizing for lower perplexity, such as\n",
      "Document 3927:\n",
      "UAT-LM, a variation of\n",
      "Document 3928:\n",
      "UAT. However, there is a tradeoff\n",
      "Document 3929:\n",
      "and it can lead to lower attack success rate.\n",
      "Document 3930:\n",
      "Fig. 18. Perplexity filter can\n",
      "Document 3931:\n",
      "block attacks by [Zou et al.\n",
      "Document 3932:\n",
      "(2023)](h\n",
      "Document 3933:\n",
      "ttps://arx\n",
      "Document 3934:\n",
      "iv.org/abs\n",
      "Document 3935:\n",
      "/2307.1504\n",
      "Document 3936:\n",
      "3).\n",
      "Document 3937:\n",
      "\"PPL Passed\" and \"PPL Window\n",
      "Document 3938:\n",
      "Passed\" are the rates at which harmful prompts with\n",
      "Document 3939:\n",
      "an adversarial suffix bypass the filter without detection.\n",
      "Document 3940:\n",
      "The lower the pass rate the better the filter\n",
      "Document 3941:\n",
      "is. (Image source: Jain et\n",
      "Document 3942:\n",
      "al. 2023)\n",
      "Document 3943:\n",
      "Jain et al. 2023 also tested\n",
      "Document 3944:\n",
      "methods of preprocessing text inputs to remove adversarial\n",
      "Document 3945:\n",
      "modifications while semantic meaning remains.\n",
      "Document 3946:\n",
      "Paraphrase: Use LLM to\n",
      "Document 3947:\n",
      "paraphrase input text, which can may cause small\n",
      "Document 3948:\n",
      "impacts on downstream task performance.\n",
      "Document 3949:\n",
      "Retokenization: Breaks tokens apart and\n",
      "Document 3950:\n",
      "represent them with multiple smaller tokens, via,\n",
      "Document 3951:\n",
      "e.g. BPE-dropout\n",
      "Document 3952:\n",
      "(drop random p% tokens). The hypothesis is\n",
      "Document 3953:\n",
      "that adversarial prompts are likely to exploit specific\n",
      "Document 3954:\n",
      "adversarial combinations of tokens. This does help degrade\n",
      "Document 3955:\n",
      "the attack success rate but is limited,\n",
      "Document 3956:\n",
      "e.g. 90+% down to\n",
      "Document 3957:\n",
      "40%.\n",
      "Document 3958:\n",
      "Citation#\n",
      "Cited as:\n",
      "Document 3959:\n",
      "Weng, Lilian. (Oct\n",
      "Document 3960:\n",
      "2023). “Adversarial Attacks on\n",
      "Document 3961:\n",
      "LLMs”. Lil’Log.\n",
      "Document 3962:\n",
      "https://l\n",
      "Document 3963:\n",
      "ilianweng.\n",
      "Document 3964:\n",
      "github.io/\n",
      "Document 3965:\n",
      "posts/2023\n",
      "Document 3966:\n",
      "-10-25-adv\n",
      "Document 3967:\n",
      "-attack-ll\n",
      "Document 3968:\n",
      "m/.\n",
      "Document 3969:\n",
      "Or\n",
      "Document 3970:\n",
      "@article{\n",
      "Document 3971:\n",
      "weng2023at\n",
      "Document 3972:\n",
      "tack,\n",
      "Document 3973:\n",
      "title   = \"Adversarial\n",
      "Document 3974:\n",
      "Attacks on LLMs\",\n",
      "Document 3975:\n",
      "author  = \"Weng,\n",
      "Document 3976:\n",
      "Lilian\",\n",
      "Document 3977:\n",
      "journal =\n",
      "Document 3978:\n",
      "\"lilianwe\n",
      "Document 3979:\n",
      "ng.github.\n",
      "Document 3980:\n",
      "io\",\n",
      "Document 3981:\n",
      "year    =\n",
      "Document 3982:\n",
      "\"2023\",\n",
      "Document 3983:\n",
      "month   = \"Oct\",\n",
      "Document 3984:\n",
      "url     =\n",
      "Document 3985:\n",
      "\"https://\n",
      "Document 3986:\n",
      "lilianweng\n",
      "Document 3987:\n",
      ".github.io\n",
      "Document 3988:\n",
      "/posts/202\n",
      "Document 3989:\n",
      "3-10-25-ad\n",
      "Document 3990:\n",
      "v-attack-l\n",
      "Document 3991:\n",
      "lm/\"\n",
      "Document 3992:\n",
      "}\n",
      "References#\n",
      "Document 3993:\n",
      "[1] Madry et al.\n",
      "Document 3994:\n",
      "“Towards Deep Learning Models Resistant\n",
      "Document 3995:\n",
      "to Adversarial Attacks”.\n",
      "Document 3996:\n",
      "ICLR 2018.\n",
      "Document 3997:\n",
      "[2] Ribeiro et al.\n",
      "Document 3998:\n",
      "“Semantically equivalent adversarial rules for debugging\n",
      "Document 3999:\n",
      "NLP models”. ACL 2018.\n",
      "Document 4000:\n",
      "[3] Guo et al.\n",
      "Document 4001:\n",
      "“Gradient-based adversarial attacks against\n",
      "Document 4002:\n",
      "text transformers”. arXiv\n",
      "Document 4003:\n",
      "preprint\n",
      "Document 4004:\n",
      "arXiv:2104.13733\n",
      "Document 4005:\n",
      "(2021).\n",
      "Document 4006:\n",
      "[4] Ebrahimi et al.\n",
      "Document 4007:\n",
      "“HotFlip: White-Box\n",
      "Document 4008:\n",
      "Adversarial Examples for Text Classification”.\n",
      "Document 4009:\n",
      "ACL 2018.\n",
      "Document 4010:\n",
      "[5] Wallace et al.\n",
      "Document 4011:\n",
      "“Universal Adversarial Triggers for\n",
      "Document 4012:\n",
      "Attacking and Analyzing NLP.”\n",
      "Document 4013:\n",
      "EMNLP-IJCNLP 2019. |\n",
      "Document 4014:\n",
      "code\n",
      "Document 4015:\n",
      "[6] Mehrabi et al.\n",
      "Document 4016:\n",
      "“Robust Conversational Agents against\n",
      "Document 4017:\n",
      "Imperceptible Toxicity\n",
      "Document 4018:\n",
      "Triggers.” NAACL\n",
      "Document 4019:\n",
      "2022.\n",
      "Document 4020:\n",
      "[7] Zou et al.\n",
      "Document 4021:\n",
      "“Universal and Transferable Adversarial Attacks\n",
      "Document 4022:\n",
      "on Aligned Language Models.”\n",
      "Document 4023:\n",
      "arXiv preprint\n",
      "Document 4024:\n",
      "arXiv:2307.15043\n",
      "Document 4025:\n",
      "(2023)\n",
      "Document 4026:\n",
      "[8] Deng et al.\n",
      "Document 4027:\n",
      "“RLPrompt: Optimizing Discrete\n",
      "Document 4028:\n",
      "Text Prompts with Reinforcement Learning.”\n",
      "Document 4029:\n",
      "EMNLP 2022.\n",
      "Document 4030:\n",
      "[9] Jin et al.\n",
      "Document 4031:\n",
      "“Is BERT Really Robust? A\n",
      "Document 4032:\n",
      "Strong Baseline for Natural Language Attack on Text Classification\n",
      "Document 4033:\n",
      "and Entailment.” AAAI\n",
      "Document 4034:\n",
      "2020.\n",
      "Document 4035:\n",
      "[10] Li et al.\n",
      "Document 4036:\n",
      "“BERT-Attack: Adversarial\n",
      "Document 4037:\n",
      "Attack Against BERT Using BERT.”\n",
      "Document 4038:\n",
      "EMNLP 2020.\n",
      "Document 4039:\n",
      "[11] Morris et al.\n",
      "Document 4040:\n",
      "\"TextAttack: A Framework for Adversarial\n",
      "Document 4041:\n",
      "Attacks, Data Augmentation, and Adversarial\n",
      "Document 4042:\n",
      "Training in NLP.\" EMNLP 2020.\n",
      "Document 4043:\n",
      "[12] Xu et al.\n",
      "Document 4044:\n",
      "“Bot-Adversarial Dialogue for Safe\n",
      "Document 4045:\n",
      "Conversational Agents.” NAACL\n",
      "Document 4046:\n",
      "2021.\n",
      "Document 4047:\n",
      "[13] Ziegler et al.\n",
      "Document 4048:\n",
      "“Adversarial training for high-stakes\n",
      "Document 4049:\n",
      "reliability.” NeurIPS 2022.\n",
      "Document 4050:\n",
      "[14] Anthropic, “Red\n",
      "Document 4051:\n",
      "Teaming Language Models to Reduce Harms:\n",
      "Document 4052:\n",
      "Methods, Scaling Behaviors, and Lessons\n",
      "Document 4053:\n",
      "Learned.” arXiv preprint\n",
      "Document 4054:\n",
      "arXiv:2202.03286\n",
      "Document 4055:\n",
      "(2022)\n",
      "Document 4056:\n",
      "[15] Perez et al.\n",
      "Document 4057:\n",
      "“Red Teaming Language Models with Language\n",
      "Document 4058:\n",
      "Models.” arXiv preprint\n",
      "Document 4059:\n",
      "arXiv:2202.03286\n",
      "Document 4060:\n",
      "(2022)\n",
      "Document 4061:\n",
      "[16] Ganguli et al.\n",
      "Document 4062:\n",
      "“Red Teaming Language Models to Reduce\n",
      "Document 4063:\n",
      "Harms: Methods, Scaling\n",
      "Document 4064:\n",
      "Behaviors, and Lessons Learned.”\n",
      "Document 4065:\n",
      "arXiv preprint\n",
      "Document 4066:\n",
      "arXiv:220\n",
      "Document 4067:\n",
      "9.07858\n",
      "Document 4068:\n",
      "(2022)\n",
      "Document 4069:\n",
      "[17] Mehrabi et al.\n",
      "Document 4070:\n",
      "“FLIRT: Feedback Loop In-context\n",
      "Document 4071:\n",
      "Red Teaming.” arXiv\n",
      "Document 4072:\n",
      "preprint\n",
      "Document 4073:\n",
      "arXiv:2308.04265\n",
      "Document 4074:\n",
      "(2023)\n",
      "Document 4075:\n",
      "[18] Casper et al.\n",
      "Document 4076:\n",
      "“Explore, Establish, Exploit:\n",
      "Document 4077:\n",
      "Red Teaming Language Models from\n",
      "Document 4078:\n",
      "Scratch.” arXiv preprint\n",
      "Document 4079:\n",
      "arXiv:2306.09442\n",
      "Document 4080:\n",
      "(2023)\n",
      "Document 4081:\n",
      "[19] Xie et al.\n",
      "Document 4082:\n",
      "“Defending ChatGPT against Jailbreak\n",
      "Document 4083:\n",
      "Attack via Self-Reminder.” Research\n",
      "Document 4084:\n",
      "Square (2023)\n",
      "Document 4085:\n",
      "[20] Jones et al.\n",
      "Document 4086:\n",
      "“Automatically Auditing Large Language Models via\n",
      "Document 4087:\n",
      "Discrete Optimization.” arXiv\n",
      "Document 4088:\n",
      "preprint\n",
      "Document 4089:\n",
      "arXiv:2303.04381\n",
      "Document 4090:\n",
      "(2023)\n",
      "Document 4091:\n",
      "[21] Greshake et al.\n",
      "Document 4092:\n",
      "“Compromising Real-World\n",
      "Document 4093:\n",
      "LLM-Integrated Applications with Indirect Prompt\n",
      "Document 4094:\n",
      "Injection.” arXiv preprint\n",
      "Document 4095:\n",
      "arXiv:230\n",
      "Document 4096:\n",
      "2.12173(20\n",
      "Document 4097:\n",
      "23)\n",
      "Document 4098:\n",
      "[22] Jain et al.\n",
      "Document 4099:\n",
      "“Baseline Defenses for Adversarial\n",
      "Document 4100:\n",
      "Attacks Against Aligned Language Models.”\n",
      "Document 4101:\n",
      "arXiv preprint\n",
      "Document 4102:\n",
      "arXiv:2309.00614\n",
      "Document 4103:\n",
      "(2023)\n",
      "Document 4104:\n",
      "[23] Wei et al.\n",
      "Document 4105:\n",
      "“Jailbroken: How Does LLM\n",
      "Document 4106:\n",
      "Safety Training Fail?” arXiv\n",
      "Document 4107:\n",
      "preprint\n",
      "Document 4108:\n",
      "arXiv:2307.02483\n",
      "Document 4109:\n",
      "(2023)\n",
      "Document 4110:\n",
      "[24] Wei & Zou.\n",
      "Document 4111:\n",
      "“EDA: Easy data augmentation techniques\n",
      "Document 4112:\n",
      "for boosting performance on text classification tasks.”\n",
      "Document 4113:\n",
      "EMNLP-IJCNLP 2019.\n",
      "Document 4114:\n",
      "[25]\n",
      "Document 4115:\n",
      "www.jailbreakchat.com\n",
      "Document 4116:\n",
      "[26] WitchBOT.\n",
      "Document 4117:\n",
      "“You can use GPT-4 to\n",
      "Document 4118:\n",
      "create prompt injections against GPT-4”\n",
      "Document 4119:\n",
      "Apr 2023.\n",
      "Document 4120:\n",
      "nlp\n",
      "language-model\n",
      "safety\n",
      "Document 4121:\n",
      "adversarial attacks\n",
      "robustness\n",
      "Document 4122:\n",
      "redteam\n",
      "Document 4123:\n",
      "«\n",
      "Document 4124:\n",
      "Thinking about High-Quality Human Data\n",
      "Document 4125:\n",
      "»\n",
      "Document 4126:\n",
      "LLM Powered Autonomous Agents\n",
      "Document 4127:\n",
      "© 2024 Lil'Log\n",
      "Document 4128:\n",
      "Powered by\n",
      "Document 4129:\n",
      "Hugo &\n",
      "Document 4130:\n",
      "PaperMod\n"
     ]
    }
   ],
   "source": [
    "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
    "from langchain_community.document_loaders import WebBaseLoader\n",
    "from langchain_community.embeddings import GPT4AllEmbeddings\n",
    "from langchain_community.vectorstores import Chroma\n",
    "\n",
    "urls = [\n",
    "    \"https://lilianweng.github.io/posts/2023-06-23-agent/\",\n",
    "    \"https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/\",\n",
    "    \"https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/\",\n",
    "]\n",
    "\n",
    "docs = [WebBaseLoader(url).load() for url in urls]\n",
    "print(len(docs))\n",
    "for i, doc in enumerate(docs):\n",
    "    print(f\"Document {i}[{len(doc)}]:\")\n",
    "    for sentence in doc:\n",
    "        print(dir(sentence))\n",
    "        print(sentence)\n",
    "\n",
    "docs_list = [item for sublist in docs for item in sublist]\n",
    "print(len(docs_list))\n",
    "text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(\n",
    "    chunk_size=250, chunk_overlap=0\n",
    ")\n",
    "# print(text_splitter)\n",
    "doc_splits = text_splitter.split_documents(docs_list)\n",
    "print(len(doc_splits))\n",
    "# print(doc_splits)\n",
    "for i, doc_split in enumerate(doc_splits):\n",
    "    print(f\"Document {i}:\")\n",
    "    print(doc_split.page_content)\n",
    "\n",
    "# Add to vectorDB\n",
    "vectorstore = Chroma.from_documents(\n",
    "    documents=doc_splits,\n",
    "    collection_name=\"rag-chroma\",\n",
    "    embedding=GPT4AllEmbeddings(),\n",
    ")\n",
    "retriever = vectorstore.as_retriever()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "c:\\Users\\zhaoy\\anaconda3\\Lib\\site-packages\\langchain_core\\_api\\deprecation.py:119: LangChainDeprecationWarning: The method `BaseRetriever.get_relevant_documents` was deprecated in langchain-core 0.1.46 and will be removed in 0.3.0. Use invoke instead.\n",
      "  warn_deprecated(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "LLM Powered Autonomous Agents | Lil'Log\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "Lil'Log\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "Posts\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "Archive\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "Search\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "Tags\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "FAQ\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "emojisearch.app\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "      LLM Powered Autonomous Agents\n",
      "    \n",
      "Date: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng\n",
      "\n",
      "\n",
      " \n",
      "\n",
      "\n",
      "Table of Contents\n",
      "\n",
      "\n",
      "\n",
      "Agent System Overview\n",
      "\n",
      "Component One: Planning\n",
      "\n",
      "Task Decomposition\n",
      "\n",
      "Self-Reflection\n",
      "\n",
      "\n",
      "Component Two: Memory\n",
      "\n",
      "Types of Memory\n",
      "\n",
      "Maximum Inner Product Search (MIPS)\n",
      "\n",
      "\n",
      "Component Three: Tool Use\n",
      "\n",
      "Case Studies\n",
      "\n",
      "Scientific Discovery Agent\n",
      "\n",
      "Generative Agents Simulation\n",
      "\n",
      "Proof-of-Concept Examples\n",
      "\n",
      "\n",
      "Challenges\n",
      "\n",
      "Citation\n",
      "\n",
      "References\n",
      "{'datasource': 'vectorstore'}\n"
     ]
    }
   ],
   "source": [
    "### Router\n",
    "\n",
    "from langchain.prompts import PromptTemplate\n",
    "from langchain_community.chat_models import ChatOllama\n",
    "from langchain_core.output_parsers import JsonOutputParser\n",
    "\n",
    "# LLM\n",
    "llm = ChatOllama(model=local_llm, format=\"json\", temperature=0)\n",
    "\n",
    "prompt = PromptTemplate(\n",
    "    template=\"\"\"You are an expert at routing a user question to a vectorstore or web search. \\n\n",
    "    Use the vectorstore for questions on LLM  agents, prompt engineering, and adversarial attacks. \\n\n",
    "    You do not need to be stringent with the keywords in the question related to these topics. \\n\n",
    "    Otherwise, use web-search. Give a binary choice 'web_search' or 'vectorstore' based on the question. \\n\n",
    "    Return the a JSON with a single key 'datasource' and no premable or explanation. \\n\n",
    "    Question to route: {question}\"\"\",\n",
    "    input_variables=[\"question\"],\n",
    ")\n",
    "\n",
    "question_router = prompt | llm | JsonOutputParser()\n",
    "question = \"llm agent memory\"\n",
    "docs = retriever.get_relevant_documents(question)\n",
    "doc_txt = docs[1].page_content\n",
    "print(doc_txt)\n",
    "print(question_router.invoke({\"question\": question}))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'score': 'yes'}\n"
     ]
    }
   ],
   "source": [
    "### Retrieval Grader\n",
    "\n",
    "from langchain.prompts import PromptTemplate\n",
    "from langchain_community.chat_models import ChatOllama\n",
    "from langchain_core.output_parsers import JsonOutputParser\n",
    "\n",
    "# LLM\n",
    "llm = ChatOllama(model=local_llm, format=\"json\", temperature=0)\n",
    "\n",
    "prompt = PromptTemplate(\n",
    "    template=\"\"\"You are a grader assessing relevance of a retrieved document to a user question. \\n \n",
    "    Here is the retrieved document: \\n\\n {document} \\n\\n\n",
    "    Here is the user question: {question} \\n\n",
    "    If the document contains keywords related to the user question, grade it as relevant. \\n\n",
    "    It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \\n\n",
    "    Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question. \\n\n",
    "    Provide the binary score as a JSON with a single key 'score' and no premable or explanation.\"\"\",\n",
    "    input_variables=[\"question\", \"document\"],\n",
    ")\n",
    "\n",
    "retrieval_grader = prompt | llm | JsonOutputParser()\n",
    "question = \"agent memory\"\n",
    "docs = retriever.get_relevant_documents(question)\n",
    "doc_txt = docs[1].page_content\n",
    "print(retrieval_grader.invoke({\"question\": question, \"document\": doc_txt}))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " In an LLM-powered autonomous agent system, the agent's memory is a key component that functions as a long-term memory module. This memory records a comprehensive list of agents’ experiences in natural language and can be accessed for informing the agent's behavior. The memory stream is essentially an external database used by the agent.\n"
     ]
    }
   ],
   "source": [
    "### Generate\n",
    "\n",
    "from langchain import hub\n",
    "from langchain_community.chat_models import ChatOllama\n",
    "from langchain_core.output_parsers import StrOutputParser\n",
    "\n",
    "# Prompt\n",
    "prompt = hub.pull(\"rlm/rag-prompt\")\n",
    "\n",
    "# LLM\n",
    "llm = ChatOllama(model=local_llm, temperature=0)\n",
    "\n",
    "\n",
    "# Post-processing\n",
    "def format_docs(docs):\n",
    "    return \"\\n\\n\".join(doc.page_content for doc in docs)\n",
    "\n",
    "\n",
    "# Chain\n",
    "rag_chain = prompt | llm | StrOutputParser()\n",
    "\n",
    "# Run\n",
    "question = \"agent memory\"\n",
    "generation = rag_chain.invoke({\"context\": docs, \"question\": question})\n",
    "print(generation)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'score': 'yes'}"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "### Hallucination Grader\n",
    "\n",
    "# LLM\n",
    "llm = ChatOllama(model=local_llm, format=\"json\", temperature=0)\n",
    "\n",
    "# Prompt\n",
    "prompt = PromptTemplate(\n",
    "    template=\"\"\"You are a grader assessing whether an answer is grounded in / supported by a set of facts. \\n \n",
    "    Here are the facts:\n",
    "    \\n ------- \\n\n",
    "    {documents} \n",
    "    \\n ------- \\n\n",
    "    Here is the answer: {generation}\n",
    "    Give a binary score 'yes' or 'no' score to indicate whether the answer is grounded in / supported by a set of facts. \\n\n",
    "    Provide the binary score as a JSON with a single key 'score' and no preamble or explanation.\"\"\",\n",
    "    input_variables=[\"generation\", \"documents\"],\n",
    ")\n",
    "\n",
    "hallucination_grader = prompt | llm | JsonOutputParser()\n",
    "hallucination_grader.invoke({\"documents\": docs, \"generation\": generation})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'score': 'yes'}"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "### Answer Grader\n",
    "\n",
    "# LLM\n",
    "llm = ChatOllama(model=local_llm, format=\"json\", temperature=0)\n",
    "\n",
    "# Prompt\n",
    "prompt = PromptTemplate(\n",
    "    template=\"\"\"You are a grader assessing whether an answer is useful to resolve a question. \\n \n",
    "    Here is the answer:\n",
    "    \\n ------- \\n\n",
    "    {generation} \n",
    "    \\n ------- \\n\n",
    "    Here is the question: {question}\n",
    "    Give a binary score 'yes' or 'no' to indicate whether the answer is useful to resolve a question. \\n\n",
    "    Provide the binary score as a JSON with a single key 'score' and no preamble or explanation.\"\"\",\n",
    "    input_variables=[\"generation\", \"question\"],\n",
    ")\n",
    "\n",
    "answer_grader = prompt | llm | JsonOutputParser()\n",
    "answer_grader.invoke({\"question\": question, \"generation\": generation})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\" What is the function of an agent's memory in a given context?\""
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "### Question Re-writer\n",
    "\n",
    "# LLM\n",
    "llm = ChatOllama(model=local_llm, temperature=0)\n",
    "\n",
    "# Prompt\n",
    "re_write_prompt = PromptTemplate(\n",
    "    template=\"\"\"You a question re-writer that converts an input question to a better version that is optimized \\n \n",
    "     for vectorstore retrieval. Look at the initial and formulate an improved question. \\n\n",
    "     Here is the initial question: \\n\\n {question}. Improved question with no preamble: \\n \"\"\",\n",
    "    input_variables=[\"generation\", \"question\"],\n",
    ")\n",
    "\n",
    "question_rewriter = re_write_prompt | llm | StrOutputParser()\n",
    "question_rewriter.invoke({\"question\": question})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "### Search\n",
    "\n",
    "# from langchain_community.tools.tavily_search import TavilySearchResults\n",
    "from langchain_community.tools import DuckDuckGoSearchResults\n",
    "# web_search_tool = TavilySearchResults(k=3)\n",
    "\n",
    "web_search_tool = DuckDuckGoSearchResults(k=3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "from typing import List\n",
    "\n",
    "from typing_extensions import TypedDict\n",
    "\n",
    "\n",
    "class GraphState(TypedDict):\n",
    "    \"\"\"\n",
    "    Represents the state of our graph.\n",
    "\n",
    "    Attributes:\n",
    "        question: question\n",
    "        generation: LLM generation\n",
    "        documents: list of documents\n",
    "    \"\"\"\n",
    "\n",
    "    question: str\n",
    "    generation: str\n",
    "    documents: List[str]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "### Nodes\n",
    "\n",
    "from langchain.schema import Document\n",
    "\n",
    "\n",
    "def retrieve(state):\n",
    "    \"\"\"\n",
    "    Retrieve documents\n",
    "\n",
    "    Args:\n",
    "        state (dict): The current graph state\n",
    "\n",
    "    Returns:\n",
    "        state (dict): New key added to state, documents, that contains retrieved documents\n",
    "    \"\"\"\n",
    "    print(\"---RETRIEVE---\")\n",
    "    question = state[\"question\"]\n",
    "\n",
    "    # Retrieval\n",
    "    documents = retriever.get_relevant_documents(question)\n",
    "    return {\"documents\": documents, \"question\": question}\n",
    "\n",
    "\n",
    "def generate(state):\n",
    "    \"\"\"\n",
    "    Generate answer\n",
    "\n",
    "    Args:\n",
    "        state (dict): The current graph state\n",
    "\n",
    "    Returns:\n",
    "        state (dict): New key added to state, generation, that contains LLM generation\n",
    "    \"\"\"\n",
    "    print(\"---GENERATE---\")\n",
    "    question = state[\"question\"]\n",
    "    documents = state[\"documents\"]\n",
    "\n",
    "    # RAG generation\n",
    "    generation = rag_chain.invoke({\"context\": documents, \"question\": question})\n",
    "    return {\"documents\": documents, \"question\": question, \"generation\": generation}\n",
    "\n",
    "\n",
    "def grade_documents(state):\n",
    "    \"\"\"\n",
    "    Determines whether the retrieved documents are relevant to the question.\n",
    "\n",
    "    Args:\n",
    "        state (dict): The current graph state\n",
    "\n",
    "    Returns:\n",
    "        state (dict): Updates documents key with only filtered relevant documents\n",
    "    \"\"\"\n",
    "\n",
    "    print(\"---CHECK DOCUMENT RELEVANCE TO QUESTION---\")\n",
    "    question = state[\"question\"]\n",
    "    documents = state[\"documents\"]\n",
    "\n",
    "    # Score each doc\n",
    "    filtered_docs = []\n",
    "    for d in documents:\n",
    "        score = retrieval_grader.invoke(\n",
    "            {\"question\": question, \"document\": d.page_content}\n",
    "        )\n",
    "        grade = score[\"score\"]\n",
    "        if grade == \"yes\":\n",
    "            print(\"---GRADE: DOCUMENT RELEVANT---\")\n",
    "            filtered_docs.append(d)\n",
    "        else:\n",
    "            print(\"---GRADE: DOCUMENT NOT RELEVANT---\")\n",
    "            continue\n",
    "    return {\"documents\": filtered_docs, \"question\": question}\n",
    "\n",
    "\n",
    "def transform_query(state):\n",
    "    \"\"\"\n",
    "    Transform the query to produce a better question.\n",
    "\n",
    "    Args:\n",
    "        state (dict): The current graph state\n",
    "\n",
    "    Returns:\n",
    "        state (dict): Updates question key with a re-phrased question\n",
    "    \"\"\"\n",
    "\n",
    "    print(\"---TRANSFORM QUERY---\")\n",
    "    question = state[\"question\"]\n",
    "    documents = state[\"documents\"]\n",
    "\n",
    "    # Re-write question\n",
    "    better_question = question_rewriter.invoke({\"question\": question})\n",
    "    return {\"documents\": documents, \"question\": better_question}\n",
    "\n",
    "\n",
    "def web_search(state):\n",
    "    \"\"\"\n",
    "    Web search based on the re-phrased question.\n",
    "\n",
    "    Args:\n",
    "        state (dict): The current graph state\n",
    "\n",
    "    Returns:\n",
    "        state (dict): Updates documents key with appended web results\n",
    "    \"\"\"\n",
    "\n",
    "    print(\"---WEB SEARCH---\")\n",
    "    question = state[\"question\"]\n",
    "\n",
    "    # Web search\n",
    "    docs = web_search_tool.invoke({\"query\": question})\n",
    "    web_results = \"\\n\".join([d[\"content\"] for d in docs])\n",
    "    web_results = Document(page_content=web_results)\n",
    "\n",
    "    return {\"documents\": web_results, \"question\": question}\n",
    "\n",
    "\n",
    "### Edges ###\n",
    "\n",
    "\n",
    "def route_question(state):\n",
    "    \"\"\"\n",
    "    Route question to web search or RAG.\n",
    "\n",
    "    Args:\n",
    "        state (dict): The current graph state\n",
    "\n",
    "    Returns:\n",
    "        str: Next node to call\n",
    "    \"\"\"\n",
    "\n",
    "    print(\"---ROUTE QUESTION---\")\n",
    "    question = state[\"question\"]\n",
    "    print(question)\n",
    "    source = question_router.invoke({\"question\": question})\n",
    "    print(source)\n",
    "    print(source[\"datasource\"])\n",
    "    if source[\"datasource\"] == \"web_search\":\n",
    "        print(\"---ROUTE QUESTION TO WEB SEARCH---\")\n",
    "        return \"web_search\"\n",
    "    elif source[\"datasource\"] == \"vectorstore\":\n",
    "        print(\"---ROUTE QUESTION TO RAG---\")\n",
    "        return \"vectorstore\"\n",
    "\n",
    "\n",
    "def decide_to_generate(state):\n",
    "    \"\"\"\n",
    "    Determines whether to generate an answer, or re-generate a question.\n",
    "\n",
    "    Args:\n",
    "        state (dict): The current graph state\n",
    "\n",
    "    Returns:\n",
    "        str: Binary decision for next node to call\n",
    "    \"\"\"\n",
    "\n",
    "    print(\"---ASSESS GRADED DOCUMENTS---\")\n",
    "    state[\"question\"]\n",
    "    filtered_documents = state[\"documents\"]\n",
    "\n",
    "    if not filtered_documents:\n",
    "        # All documents have been filtered check_relevance\n",
    "        # We will re-generate a new query\n",
    "        print(\n",
    "            \"---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, TRANSFORM QUERY---\"\n",
    "        )\n",
    "        return \"transform_query\"\n",
    "    else:\n",
    "        # We have relevant documents, so generate answer\n",
    "        print(\"---DECISION: GENERATE---\")\n",
    "        return \"generate\"\n",
    "\n",
    "\n",
    "def grade_generation_v_documents_and_question(state):\n",
    "    \"\"\"\n",
    "    Determines whether the generation is grounded in the document and answers question.\n",
    "\n",
    "    Args:\n",
    "        state (dict): The current graph state\n",
    "\n",
    "    Returns:\n",
    "        str: Decision for next node to call\n",
    "    \"\"\"\n",
    "\n",
    "    print(\"---CHECK HALLUCINATIONS---\")\n",
    "    question = state[\"question\"]\n",
    "    documents = state[\"documents\"]\n",
    "    generation = state[\"generation\"]\n",
    "\n",
    "    score = hallucination_grader.invoke(\n",
    "        {\"documents\": documents, \"generation\": generation}\n",
    "    )\n",
    "    grade = score[\"score\"]\n",
    "\n",
    "    # Check hallucination\n",
    "    if grade == \"yes\":\n",
    "        print(\"---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\")\n",
    "        # Check question-answering\n",
    "        print(\"---GRADE GENERATION vs QUESTION---\")\n",
    "        score = answer_grader.invoke({\"question\": question, \"generation\": generation})\n",
    "        grade = score[\"score\"]\n",
    "        if grade == \"yes\":\n",
    "            print(\"---DECISION: GENERATION ADDRESSES QUESTION---\")\n",
    "            return \"useful\"\n",
    "        else:\n",
    "            print(\"---DECISION: GENERATION DOES NOT ADDRESS QUESTION---\")\n",
    "            return \"not useful\"\n",
    "    else:\n",
    "        print(\"---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, RE-TRY---\")\n",
    "        return \"not supported\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "from langgraph.graph import END, StateGraph\n",
    "\n",
    "workflow = StateGraph(GraphState)\n",
    "\n",
    "# Define the nodes\n",
    "workflow.add_node(\"web_search\", web_search)  # web search\n",
    "workflow.add_node(\"retrieve\", retrieve)  # retrieve\n",
    "workflow.add_node(\"grade_documents\", grade_documents)  # grade documents\n",
    "workflow.add_node(\"generate\", generate)  # generatae\n",
    "workflow.add_node(\"transform_query\", transform_query)  # transform_query\n",
    "\n",
    "# Build graph\n",
    "workflow.set_conditional_entry_point(\n",
    "    route_question,\n",
    "    {\n",
    "        \"web_search\": \"web_search\",\n",
    "        \"vectorstore\": \"retrieve\",\n",
    "    },\n",
    ")\n",
    "workflow.add_edge(\"web_search\", \"generate\")\n",
    "workflow.add_edge(\"retrieve\", \"grade_documents\")\n",
    "workflow.add_conditional_edges(\n",
    "    \"grade_documents\",\n",
    "    decide_to_generate,\n",
    "    {\n",
    "        \"transform_query\": \"transform_query\",\n",
    "        \"generate\": \"generate\",\n",
    "    },\n",
    ")\n",
    "workflow.add_edge(\"transform_query\", \"retrieve\")\n",
    "workflow.add_conditional_edges(\n",
    "    \"generate\",\n",
    "    grade_generation_v_documents_and_question,\n",
    "    {\n",
    "        \"not supported\": \"generate\",\n",
    "        \"useful\": END,\n",
    "        \"not useful\": \"transform_query\",\n",
    "    },\n",
    ")\n",
    "\n",
    "# Compile\n",
    "app = workflow.compile()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/jpeg": "/9j/4AAQSkZJRgABAQAAAQABAAD/4gHYSUNDX1BST0ZJTEUAAQEAAAHIAAAAAAQwAABtbnRyUkdCIFhZWiAH4AABAAEAAAAAAABhY3NwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQAA9tYAAQAAAADTLQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAlkZXNjAAAA8AAAACRyWFlaAAABFAAAABRnWFlaAAABKAAAABRiWFlaAAABPAAAABR3dHB0AAABUAAAABRyVFJDAAABZAAAAChnVFJDAAABZAAAAChiVFJDAAABZAAAAChjcHJ0AAABjAAAADxtbHVjAAAAAAAAAAEAAAAMZW5VUwAAAAgAAAAcAHMAUgBHAEJYWVogAAAAAAAAb6IAADj1AAADkFhZWiAAAAAAAABimQAAt4UAABjaWFlaIAAAAAAAACSgAAAPhAAAts9YWVogAAAAAAAA9tYAAQAAAADTLXBhcmEAAAAAAAQAAAACZmYAAPKnAAANWQAAE9AAAApbAAAAAAAAAABtbHVjAAAAAAAAAAEAAAAMZW5VUwAAACAAAAAcAEcAbwBvAGcAbABlACAASQBuAGMALgAgADIAMAAxADb/2wBDAAMCAgMCAgMDAwMEAwMEBQgFBQQEBQoHBwYIDAoMDAsKCwsNDhIQDQ4RDgsLEBYQERMUFRUVDA8XGBYUGBIUFRT/2wBDAQMEBAUEBQkFBQkUDQsNFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBT/wAARCAHpAfwDASIAAhEBAxEB/8QAHQABAAIDAQEBAQAAAAAAAAAAAAYHBAUIAwIBCf/EAFgQAAEEAgEDAQMIBAkHBwoGAwEAAgMEBQYRBxIhEyIxQQgUFRYXUVWTMmHR0iMzUlNxgZGSlEJUVnSVtOEJNjhicnOyGCU1RnV2gqGisyQ3V2OxwSbC8f/EABsBAQACAwEBAAAAAAAAAAAAAAABAwIEBQYH/8QAOhEBAAECAQkECAUFAAMAAAAAAAECAxEEEhMUITFRUpFBU9LwFWFxkqGxweEFIjKB0TM0YqKyY3Lx/9oADAMBAAIRAxEAPwD+qaIiAiIgIiICIiAiIgIiICIiAiIgIiICIiAiIgIiICIiAiIgIiICIiAiIgLBt5zG4+b0rWQq1peOeyaZrHcffwSs5VNnMbUvdRNgdZqwWC2CoAZYw7j2X+7kLGuum1bru17qY7PbEfVsWLWmrzMcFi/WrC/jFD/Es/an1qwv4xQ/xLP2qu/q9i/w2n+Qz9ifV7F/htP8hn7Fy/SuT8lXWHR9Hf5fBYn1qwv4xQ/xLP2p9asL+MUP8Sz9qrv6vYv8Np/kM/Yn1exf4bT/ACGfsT0rk/JV1g9Hf5fBYn1qwv4xQ/xLP2p9asL+MUP8Sz9qrv6vYv8ADaf5DP2J9XsX+G0/yGfsT0rk/JV1g9Hf5fBYn1qwv4xQ/wASz9qfWrC/jFD/ABLP2qu/q9i/w2n+Qz9ifV7F/htP8hn7E9K5PyVdYPR3+XwWJ9asL+MUP8Sz9q9aufxl6dsNbJVLEzue2OKdrnHgcngAqtvq9i/w2n+Qz9i8KuLpUd3059apBXeb8zS6KJrSR8ys+OQFsZPl9jKLkWqaZiZx4dkTKu5kOjomrO3LfREW85QiIgIiICIiAiIgIiICIiAiIgIiICIiAiIgIiICIiAiIgIiICIiAiIgIiICIiAiIgIiICIiAiIgIiICIiAiIgKr8l/+Yew/9zU/8L1aCq/Jf/mHsP8A3NT/AML1q5Z/aXfZH/VLoZD/AFoZCIi8S9Giu8dT9Z6cGi3YMkact4vFaCKtLYll7AC8iOJrndrQRy7jgcjk+VFrXX/DVuq+N075tclgv4qPIQ5CCjZla58krGxM4ZEQ1ha7uMhIa0+HEFa35Q1WNjsJlKNDa27TRjtOxWY1egbnzZ5azmGxHwQ6OUhvhzePYPtN8E6iHI7Pr3UzStv2bWsnZlyOoDF5AYOm+2Kd8zRSua9rOSxn6Q7vIBHHPxW5RbommJnfOPb2tWquqKsI9Sw7PW3Sqe4t1exmvQzLrLabY5KszYjO4cti9Ys9LvPI4b3cnn3JL1r09mxX8CzJWLOXoTGvbqVMdZndA8RiT2+yMgAtI4cfBPIBJBA596n0dt2GXYRlcVu2UzlHZYbVGtj4ZvoiLGQ245I5GNYQyeQxNJI9uXvPhoAV19H8Hbxe49VbdvHz1Bf2MS15p4XMFiIU64DmEj2mhweORyOQ4fepqtW6ac76+z1espuV1VYee1ldEes9HrPrH0lXp2sfaY94mrzVZ2Rsb6sjGdsskbGyEtj5PZz2k8HgqxlT3ybJ72A08aZlsHl8ZlMNPc9WxapPZUna63I9joZv0ZOWvafB8efuVwqi9EU3JincttzM0RM7xYX/AK6ab/7Qm/3Kys1YX/rppv8A7Qm/3Kyt78M/uqfZV/zKvKP6NXsWqiIvWPKiIiAiIgIiICIiAiIgIiICIiAiIgIiICIiAiIgIiICIiAiIgIiICIiAiIgIiICIiAiIgIiICLFtZOnSE5sWoYBBEZ5fUkDfTjHve7n3NHB8+5V5m/lFabjunbt1xdi3t2F+efMI/q5Wdclln5I7GNHHPke/njyPPlBZqKCZHeNoG9azi8Xo9nIa1ka3znIZ+a7HX+juQ7tjMDh3vfyGcgHx3H7liYzE9TspY32tm81g8Rj7jZK+sWsLXkktUWn1A2ew2X2HyDmJ3aD28sPu58BYy1cu04WDPw4OTL0I83NGZYsa6ywWZGAElzY+e4gAHyB8FXWR+T3S3LppgtR3nZM3tjsbbNybJ/OnU57ryZCGyekR7AEnAaD/kM8+FO5ND1ybcI9rkwdCTZo64qx5Z9drrMcXtew2Qjlo9t3PHHPPlBE6PX3W9k17bsnqcOT26fWZXVrmOxlGRtiScEgxRNlDA8gg88EjwV85bceoeUwWn5HWNMrwSZGZrsxS2C2IJ8dByOeAzkOfx3eAfB4VltaGDhoDR9wC/UELqYDdWdUruWsbVVk0h1QRVtdbjmiVk/DOZXWOe73tfw3jjh/6gq40bQT03z+xYl2w5zZ3uMFh2Q2G586skua72O/gew3jgD4BX2ormunWPzWYnybrmRqWZ2MZJ80sdjXBvPb44/WVhdtxes12pnDOiPnE/RtZNdizcz6lb7L0r03c8iL+e1XD5m8IxELN6lHNIGAkhvc4E8Dk+P1rVH5P/TM8c6Brfj3f+a4f3VaP2VUfxjN/wCN/wCCfZVR/GM3/jf+C5EfhdcRhF75unOW2J2zT8IRLVtI17R601fXsJQwkE7xJLFj6zIWvdxxyQ0Dk8LdrZfZVR/GM3/jf+CfZVR/GM3/AI3/AILCfwiZnGbsdJZRl9qNkRLWotl9lVH8Yzf+N/4J9lVH8Yzf+N/4KPQ//ljpKfSFrhKL7NqWE3THNoZ/E0szRbIJRXvwNmjDwCA7tcCOQCfP6yosOgHTMAgaDrgB8HjFw+f/AKVaP2VUfxjN/wCN/wCCrzZdLt9Lmbjt2xbZnszptStHYqYvGVnPvViCRLy5v8Y3y0gkDtHcXEBpJzj8KrpjCL3zYTltmds0vXXOk2lahk25HB6nhsRfa0sFmlRjikDT7x3NAPBW/wD/AF003/2hN/uVlZ+u6Tg9rwGOzWMzucnx2Qrx2q0rrLoy+N7Q5ru1zQ4cgg8EArcYzppjsZl6OR+fZK3PTe6SFtqz3sa50boyeOP5L3D+tbOTZBNi9F2q5jhE9k9sTCu5llqq3NFMYYpciIuk4wiIgIiICIiAiIgIiICIiAiIgIiICIiAiIgIiICIiAiIgIiICIiAiIgIvOxYiqwvmnkZDCwdz5JHBrWj7yT7losj1D1nE7Zi9YuZ6hW2LKMMtLFyTtFidgDiXNZ7y3hj/Pu9khBIUVeY/rJW2P6919fwGayWT1T1InV7FN1WLIWGeoPRrzPHa8l0fHPuHe0+4rByOa6sbNoGv39fwGC1DZ7No/SeL2ay+22pWBeOWSVuA6QgRkA+B3kHy1BaKxcnlKeFoTXshbgoUoG90tmzI2OOMfe5ziAB/SorY03ZJuqVbY27raj1iGmYDqraUfpSSkO5mM36fxaQ34dvv8+NbrfQPU9ew+0YqWO7ncdslg2MjWzdt9tjyST2NDv0Wj3cD7h9yDK23rpo2kxavLlNggEGz2W1cRPUjfaitvcWgdskTXNDfbae4kDj4rIo9Q7l3qnkNQOo52vRp1BZGyzwBuOneQwiKJ/PLncPdz4HBjcPuKkmE17F61iqWMxOPq43H0o/SrVasTY44Wfc1oHAH9C2CCqcVJ1e23ppmI8jHgtB3WS2G4+auTkq8VfmMlz2ngOeR6oA936J+8La5Dpdkdht6Rfyu7Z+K/rzI33IsNM2nTzE4DO51iEB3LC5riIwQB3kc+ArBRBEML0k1HXtzz+2Y/CQwbBnmCPJXS97zZaA0AFrnFoHsjwAP/mVJMXiKODpsqY6lXx9Rn6MFWJsUbf6GtAAWWiAiIgIiICIiAiIgIiICIiAiIgIiIITldIyLOpFLca20ZmLHVMbJUs6vD2Pp3D5dG8NI5bICT5B5dw0choIdk9LuokHVHT6+ehxGWwJfJJBLj81UdXswSxuLHtLT4IDgRyCR/WCBLVCuonS6r1DyOr35MzmMLc17ItyFaTFWzC2U8dr4pW+WvY5vLTyOQHOAIDnAhNUUJ0fqBk9lz214zMalktXbhbza1W7ecx1fJwv59OWF4PBJ48t89pc0E9xLRNkBERAREQEREBERAREQEREBERAREQEREBERAREQEREBERAREQEREBQLdsluD951rBYfFtdquVq32ZjOxv7Z8a9sX/4cx+0PLnkj9F3HA9ynqr/AGzFXrXV7Q70O6Mw9KrDfFjWTL2uzBdEA1wb3Du9E+1+i7jn4INOz5N2t5jpjW0jc8hmeoOOiu/SD7Ow33vsSTckjmSPsPYO48N5448HkKw2aphWZCnkPomk7IU4BWr3XwNdPFEPcxshHcG+T45+JW1RAREQEREBERAREQEREBERAREQEREBERAREQEREBERAREQEREEW6m9NsJ1b0vIavsMU0mMuhpc6vKYpY3tcHMkY4e5zXAOHPI5HkEeFrOinUGj1I0SDI46lkqFWpPLjWx5d3fYcYHemXucXEu7u3nuJ5PPJ8qeKF9JrW7XNUfJv9OlRz3zyw1sVAgx/NxIfRPhzvJZwT59/wAAgmiIiAiIgIiICIiAiIgIiICIiAiIgIiICIiAiIgIiICItdnM/R1yl86vzekxzuyNjWl8kr+CQ1jByXO4BPAHuBPuBUxE1ThCYjHZDYoq8sdQc7bcTQwdarD47XZK0fUI/WyNrgP7y8Prnt3+bYT+9MrtFxqjr/DZjJb07c1ZSKtfrnt3+bYT+9Mn1z27/NsJ/emTRRzR1Tql7gnGza9S27W8rgslGZsdk6ktKzGDwXRSMLHjn4ctcV/Cfqv0oy/SnqpmtGuRPs5Gjb+bwmNhJssdwYXtaOT7bXNcB7/a496/tB9c9u/zbCf3plUe9dGD1A61aj1LyVTE/TGvM4bXZ6npWnNJdA6TxzzG4lwI957efA4TRRzR1NUvcFl/JX6Kx9BeimC1lzGjKvb89yj2kHvtyAGQcj3hoDYwfiIwVbarX657d/m2E/vTJ9c9u/zbCf3pk0Uc0dTVL3BZSKtfrnt3+bYT+9Mn1z27/NsJ/emTRRzR1NUvcFlIq4j3vZ657p8TjLkY97a9uSJ/9Qcwg/1kf0qVa5uNHZHyQRtmqX4h3S0rTOyRo547h8Ht93tNJHnjnnwsZtVRGMYTHqlXXZuW4xqhvURFUoEREBERAREQEREBERAREQEREBERAREQEREBERAVbdAMVRw2hSV8fuj99rnI23/S0kvqFrjKSYee538WfZ45+HuCslVd8nLK6vmOnctjUNetaxiRk7jDRt895mEzhJJ5c7w53JHn4+4ILRREQEREBERAREQEREBERAREQEREBERAREQEREBERB5WrUVKtNYneIoIWGSR7vc1oHJJ/qVVVrU2w2znLzS2edvFaF55FWA+Wsb9znANc8/F3A5Ia0CXdVZHM6eZwN9z4PTf/wBhzg1/P6u0lR0DgK2fyWsY7ZmOmHzx+Dq5DREzNciKoOrfVW90h3bH5DIWDNql/DXWRUhG0H6SgAmjAfx3Eyx97A3njlg4HJPMGxvX3aXUMTRyQbBser0Mpk9yqQRxkvbTi4ijaS0hondLFIC0A8NIHA5Wrg6U3aYnCXTCxMVl6OdoR3cbdr5ClIXBlmrK2WN/BLXcOaSDwQQf1ghc/wDTfc+rWZyuqZW1QzF/EZd0cmShuUcbXo1a8sfcJK0kVh057SWcCQOLmk89p4CifTjNbh006JatuVXY23tdZlTVt63NQiDfQmyL4C6KYfwnqBz+7yS0+7gcJgx03qnD/wCOt0RURj+oWyYPqzmqm4561h6jLFqTD4P6LiNPKUo4S5hht8d3rjjucwu58cBvHkFtVUU4Yr3RcydPuo/Vrb/qvs0GMy9vGZixBNZx8tLHR42ClK4dzopm2DZLmMPcC9p7i0gsbzwPW31g2al1ExF3F5zKbDqN/Z24KUzYatXxrBJI6LtgnDhPI+Nw47+Cxxa73e5MFemjDHCXSy8rVqCjVms2Zo69aFhklmlcGsY0DkucT4AAHJJVF4TddsdL1R2nJbHK/X9OyuQZXwlanA02YYKrZfTklLC7gFw7S3h3PdyXAgD0ix+75TpDltn2Hcm34cnrVm3Lg6+NhjrQGWs57GxSgeoe3njlznd3HwROkx3Qu+ncr5GnBbqTx2qs8bZYp4Xh7JGOHLXNcPBBBBBHvXxdpGyYZYZXVbtd3qV7Mf6UT/8A+wfcWnwQSCuf+lOY23TZujuPyGxszeE2nEGAY91COAUHRURPGY3t9tw7WFru8nknkce4dErKmqaJxpZUzFynbCa6jnzsuArXXxtgs+YrMLXdwimYS2RoPxAcDwfiOD8VuVA+lz3C1tUQ/imZJjhwPc51aEuH/wDB/wDiU8WzdpimvZunCesYvN3acyuaY7BERUqhERAREQEREBERAREQEREBERAREQEREBERAUL6TWt2uao+Tf6dKjnvnlhrYqBBj+biQ+ifDneSzgnz7/gFNFW3QDFUcNoUlfH7o/fa5yNt/wBLSS+oWuMpJh57nfxZ9njn4e4ILJREQEREBERAREQEREBERAREQEREBERAREQEREBERBgZ3ERZ/C38bOS2K3A+Bzm+9oc0jkfrHPIVZ4qxO+B9e60R5Go70LUfPPbIAPP9DgQ4fe1wKttRvadOZnZWXak4x+WjZ6bbHZ3MlZ5IZK3kd7QSSPILSTwQHODraZiqnMqnDh59bcya/oatu6VabloeH32HExZmt85jxmRgylYePZniJLCfHkeSCPiCvKDpxgINq2LYPmLJMhn6kFLIeoA5k0cQe0Ajj4tf2n7w1v3LfWK+wY1xZb12xY44HrY2WOaN33kBzmvH9bV4fSF//RzNf4T/AIrHV7nZGP7w7MXbNW3GEL0XotS6fXqjsdsuyz4qk10dPCXMgJKVZhBAa1vYHua0Hhoe53b449y1GB+TXr+DkxMLs3sOSw2LtC9Vwl6611JtgPLxKWNY1ziHkvDS7tB8hqsv6Qv/AOjma/wn/FPpC/8A6OZr/Cf8VOr3eCM6zxhEn5/qUHuDdL1xzefBOzzAkf0fMViwdGql/a6GyZrMZvIz1rD8hBhbV8T4+lZexzXGIem15DQ94aHHgA+GhTf6Qv8A+jma/wAJ/wAVpcl1Cp4jZcPr1zHZSvmsw2Z9Ck+r/CWBE0OlLfP+SCCU1e7ypz7c76sejQ6f0Nxui5WtLh9h2Othqksk1bXfn4OPgLw7lrWdneWDuJDHPLQeCB4C1j/k2YEvrsjzuxQY+nlBmMfjI7rBWoWRN63dG30+SC4u9mQvAD3cAe8WX9IX/wDRzNf4T/in0hf/ANHM1/hP+Kavd4IzrOGGMNRrvTzFa5Ds0MfrXINhyE2Ruw2y17C+WNkb2NAaPY7WDweT5PlRXCdAMdgcfPi4No2mXAupz0IMPYyLZK1WGSMx9rAY+49rT7Ie54bwPHhWD9IX/wDRzNf4T/in0hf/ANHM1/hP+Kavd4Jz7PGEcg6V4mvLokjbF0u06J0NDl7OJQ6sax9X2fJ7Dz7Pb5/V4Utt2oqNaSed4jijHc5x+C8I5M1bPZV1jJuefc6x6UDB/SXP5/sB/oUm1/RZ/nkOQz0kFiaF/qV6UAJhgcPc5zj5kePgeGgfBvIDkizNO25OEe2MfPtV15RatR+WcZZ3T3DT4nXhLcjdDevyuu2InnkxOfx2xn9bWBjTx45aVJkRRXVn1TVLgVVTVMzIiIsGIiIgIiICIiAiIgIiICIiAiIgIiICIiAiIgKrvk5ZXV8x07lsahr1rWMSMncYaNvnvMwmcJJPLneHO5I8/H3BWioX0mtbtc1R8m/06VHPfPLDWxUCDH83Eh9E+HO8lnBPn3/AIJoiIgIiICIiAiIgIiICIiAiIgIiICIiAiIgIiICIiAiIgIiICIiAud+rv8A0x/k/wD+qbB/urF0Qud+rv8A0x/k/wD+qbB/urEHRCIiAiIgIiICIiAiIgIiICIiAiIgIiICIiAiIgIiICIiAiIgIiICrboBiqOG0KSvj90fvtc5G2/6Wkl9QtcZSTDz3O/iz7PHPw9wVkqrvk5ZXV8x07lsahr1rWMSMncYaNvnvMwmcJJPLneHO5I8/H3BBaKIiAiIgIiICIiAiIgIiICIiAiIgIiICIiAiIgIiICIiAiIgIiIC536u/8ATH+T/wD6psH+6sXRC536u/8ATH+T/wD6psH+6sQdEIiICIiAiIgIiICIiAiIgIiICIiAiIgIiICIiAiIgIiICIiAiIgKF9JrW7XNUfJv9OlRz3zyw1sVAgx/NxIfRPhzvJZwT59/wCmirboBiqOG0KSvj90fvtc5G2/6Wkl9QtcZSTDz3O/iz7PHPw9wQWSiIgIiICIiAiIgIiICIiAiIgIiICIiAiIgIiICIiAiIgIi+ZJGxML3uDGDyXOPACD6Rax2z4Zji12WotI94Nlg/wD7X59asL+MUP8AEs/arNHXyynCX1s2Ru4jW8rexuOOYyNWpLPWxwl9I2pWsJZF38Ht7nAN7uDxzzwV/L7cf+UTOz9ZtB3p3T003alFkITjjmu42fnMQj59T5uOzs7ef0Xc+7wv6f8A1qwv4xQ/xLP2r+ZHyj/kow5v5XmKpa/YrxapuVj5/PcryNMVDg83AXfotPve0HgEyNaPcmjr5ZMJd/fJ16u5Lrn0wpblkdYOqR35ZBUpuum06WBp7RKXGOPjucHgDg+Gg8nu8Wao5gb2r6zhMfh8ZfxtTHUK8dWtXZZZ2xxMaGsaPPwAAWf9asL+MUP8Sz9qaOvlkwltEWrG04UngZehz/rLP2rPrWobkfqQTRzx+7vjcHD+0LGaKqd8IeqIixBERAREQEREBF4WL1am6Js9iKB0zwyMSPDS9x9wHPvP6gtVV3nW72fyGCrbBirGbx0frXcbFdjdZrM8HukjDu5g9pvlwA9ofeg3iKKQ9WNHsavPssW5a/JrkEvoS5hmUgNSOTkDsdN39gdy5vgnn2h96klG9WydKvcp2IrdSxG2aGxA8PjlY4ctc1w8EEEEEeCCg90REBERAREQEREBERAREQEREBVd8nLK6vmOnctjUNetaxiRk7jDRt895mEzhJJ5c7w53JHn4+4K0VC+k1rdrmqPk3+nSo5755Ya2KgQY/m4kPonw53ks4J8+/4BBNEREBERAREQEREBERAREQEREBERAREQEREBERAREQEREEc27azgWRVacTbWWsgmGJ59iNo98shHntH3Dy48AceS2BWsFFl5fXzUr85Y5JBugOjZ+pkQHY0D+jn7yT5XrWtHMZnNZZ5DnzW5KsZ8+zDA90bW+fh3CR/9MhXlsmyYzUMHbzGZuxY7GVGd81mY8NYOeB/SSSAAPJJAHkq6uuqzOZROExvntx7Y9nZ63fyaxTboiqre+hr2KaABjKYA9wFdnj/5L9+r+L/Daf5Df2KJ4LrlpWxVc5aqZgxVsHEybIzXqk1RtZr+7tD/AFWNIcQ0nt9/BaePaHPxjuu+jZTF5m/DmyyDD1TeussU7EE0Vcc8yiJ8Ye5ng+01pCo0lzmlt51HGEv+r+L/AA2n+Q39ifV/F/htP8hv7FH9W6t6numXmxeHyws3Y6/zsRvrywiWDkN9aJz2tbLHyQO9hc3yPPlYmvdcNH2rPQ4bF56OzenL21uYJY4rRYCXCGVzBHNwAT7DneAT8E0lfNJnU8Ur+r+L/Daf5Df2J9X8X+G0/wAhv7FDcb190XMVrtmlmZLFWlFNNZssx9n0oRE/seHP9PtDwSPYJ7iCCAQQV5jrzqeQ0zZtgw1yXKDA1zNapfNJ4bDD2lzA6J8YkaHceHdhHAJ9wKaSvmlGfRxhNhgMWD/6NqfkN/YvAarjIZxYp1hjLY91rHn5vKP6XM47h+p3I9/IPJUP1brprea6Z0tyyNiXDVHxwNsNt1J4iJ5GMd6UQfG10/l/DXRhwd8OVI9L6h691CrWpsDkBc+avEdiGSGSCaBxHID4pGtezkeRyBz8FlF65TuqnqY0VbOKe6nttoXIsRmZPXsSA/Nr4YGCxwOSx4HhsnHJ8ABwBIA4IE0VQ5+pJbxU4ruDLkQ9etIf8iZh7o3f1OA/pHIX7p3RfDZrp5naWZz2ybfh9zYy7YgzuTdL6EcgDhDA5oa6OMAgcA8+PerKsK6dJHsnz6/o4mV2YtVRNO6Vg7BvOuamKBzeexuJ+fzCvTF23HEbMhIAZGHEd7uSPDeT5Wtj6sanL1Ik0FmZidt8dX56/GCN/e2Hhp7i7t7fc4Hjnn9XgrzpdHNIo69r2D+rGOt4vXuPoqC/CLfzMj3OY6XucHD+Vzz+tStlKvHakssgibZkAa+ZrAHuA9wJ95VTRVnjOuVja9A2LYdY0XZL93FWfmsGIydUY+XIO5YC+FzyQWAOJ7j/ACD49yzsjsfUi5R0e1iNSxlN990cmx08rkeZcZGfTL2ROjHbK8AygHwOWt8efFioghWOxO9N6oZe9fzuMk0N9RsWPxEFQttxzcR90kkxPBHIl4aPg9vxCjmL6IWLHTHKaftm/wCxbS/I3G2pMx84FO3E0GMiKJ0f6DOYzyP/ANx4+Pi2FQD+v+hDOPxRzwFtmRfipT80n9GG22Qx+jJL6fpxuLxwA5w7uQW8ggkLCv8ARTScvd0y9k8WMnkdQjbHhrtuzI+WDtEfDnHu/hHfwTDy8E8g/eed/V0rWaOfyGdrYHE183kY/Su5KKnE2zZZ4HbJIG9zx7LfDiR7I+5VJtHXPRtMz8mGzGfjqX4RGbDRBLJHVEn6HrysYWQ93w9RzeQefctbd664zH9bPs+mp3PUOPgstuw0rMrXTSylrYyWRFrWBvBMpd2cktJBaUFvQ9K9Fr6vPrUWn69Hrk8vry4dmMgFOSTkHvdD29hdy1vkjn2R9yj2c0x2obpBvdHYMtV1zDYCWlPpuMriarYZH7URhhaPZe0dwAYOXcMaCACHYW6bzg+nuGGU2C+KFN0zK7HCN8r5JXnhrGRsDnPcfPDWgnwfuWFh+qOs56xgIKWRdLPnYrM2PjfWmjdK2u5rZ+Q5g7C1z2gh/B8+AeCgsjRN5xXUXUMVsmHklOOyMImiFmIxSt8kFrmO8gggg/Dx4JHBW+9Rn8pv9qojLdcdKwkL5bmYexrMtJgu2OlYkcbzGF7oWtbGS49o8EAgnwCT4XwOvGi/VK5spzrY8RStMo2nyVZ2S1p3ua1sckJYJGEl7f0mjwefd5QX16jP5Tf7U9Rn8pv9qo/A9ZdO2OvmZquaZAMNF6+QZkIJaUlaLgn1HsmaxwZwD7XHB4968dZ646Tt8WSfi80ZTjqhv2I56k8EgrAE+s1kjGuezx+kwEe7z5CC9vUZ/Kb/AGp6jP5Tf7Vy5uXyptTw3T23tGBNrYoYn1GRmLH22QSevIGgiX0S32QHkgeQ5vYe1zgFMcp1n1HC6/i8zfyFipVybnspwS46yLc5YSHdtX0/W8cck9ngEH3EEheXqM/lN/tX6HB3uIP9Coyz1p0mppcO2SbBX+gppvm0dhjXve+bkt9ERBpkMnII9Pt7hwfHhZXydurg6q5rqCKphkw+HyNenRmZXlhlew1mSP8AVbJwQ4Pc9vHa3gAAjnygupERB8+o0f5Q/tT1Gfym/wBqpvI9WNUrYPYNglyvZisNfmoX5jXl7obDJRG6P0+3vc7vc0DtB7u4EcghavPddtF1jPS4fJ5+OtdhcyOwfm8zoazn8drZpmsMcRPcDw9zTwR96C+PUZ/Kb/anqM/lN/tVF7f1t0rRcyMTmM0IciImzyV69Way6GM+58vpMd6TT8C/gLU4rrti8j1izWhPp3YpqMNV0NttGy9k0kolc4OcIuyNrQxvD3O7XFxAPLSEHRXqM/lN/tVddCMLU1zRpKlbcpd3iOQtS/Sk8hc4F0pJh5LneGfo+/4e4KGY7rpouW2puu1M/FLk32H1I/4CUQSzs57omTlnpPeODy1ryfB8Lyh6+6HYt5SvHnC44t9mO/N8zsCCo6uXCZssvp9jCOx3Ac4d3jt7uRyF++oz+U3+1PUZ/Kb/AGrmPdPlM6zguml/bsKLWbjrW69L0HUbUDu+V7QC4Oi7g3sJcCRw7gNB5c3mT5brZp+CwWKy2QyNmpBlXPbSryY20Lk5YSH9tX0/W4bxySWcAEH3EEhevqM/lN/tT1Gfym/2qib3XLRsdgMHm5tghOLzcroMfYiiklFiRrXOMYDWkh/sOHaQD3Dt47iAvjNdddK16rjZshlLFd2QgNqCt9G2nWRCDwZHwCIyRs5BHc9rR4QX0CCOQeR+pfq0OjZqhseq4/KYu5Dfx1uP1oLNd4eyRhPIIIW+QEREBERAREQEREBERAREQEREBERAREQEREFQYWuce7J0HgiSpkbLCCOPZdK6Rh/rZIw/1qCfKOwtrNdJ8j9H17drJU7NS9Uhp1XWnOmisRvZ3RN9p7ORy4N5d2gkAkAG4911uxFfdnsZAbMro2xXakf6csbeS2Rg+L29xBHvc3ge9rQYZmsNguo2t2sZfjiyuJskMmibI5vtNcDwS0hzXNc0fEEEKy9E1VTdjdPz7ft6nobNyL1rNjfhg5ay2JyPVPEb5cjFx++WZsNkresx0beIe+jUmJa2F04Y9znn1SJBxw5rQOOATIs7qOJ2np71ByOD1jqAdgbrNrH1n7S+9NLMJmlzoII55Huc7ujYT2t4JI4J8q9NI6Wax06kty4HGGrYthrZ7M9iWzPI1vPa0yyuc/tHJ4HPA5UrWvizizs/NvUL1L0bNbPsmm1MbWs1nTahm8ZJfETxHVmmgrNiEjwOGnuDiAfPsnj3LSdHtQxN2TTsXm9V6g089gmxyv8Apa9dkxNOzBGWh8bnzGF7T5DBGD4dxwByulUUM9FGdnOetVxm4al8lOaDA429j9pbatyeg2rxcbG/JSGR8cbx7UnoOc5gI8+zxz4Wo03U72R3bfhjsXtzMdnNQFKpkNsE5ksWWOma4OdMe6P+OZwxwbzw8tb2jldOImKNFGzbucrZbH5ja+lvTB/1Y22A6VPUjzWKginoXZAKjoHy1Htc10pjceR6bvaDj95CtvoxgMFDNm89isRtWOt3XRVZ7G2zWn2LLIgXMLW2JHPaxpleByG+efHHBVnL5llZDG6SR7Y42Auc9x4AA95JU70024pnFjZa+3F4u5ccCWwQvl4aOSeATwB8T+pQ7Zfle9J+gGsY/Xs1s8eTz2IpxUp8VhWfO52yxNEb43Fv8Gx7XNPLXvaRwVPtbwx3K1WuvZ/5ggkbOx7uQbkjSHMLR8YgQHd3+UQOPZ8nn75WX/J74nqvPktu0J0WD3KxI6zbpTPIqZKRxJe4nz6Uriee4ey4/pAFxetqY0dGZO+Zxn6fvvcnLbsV1RTT2N58n75eDvlFdY/qfgNAsVcMytNdnzFvKRiWvAwcB7q4Zw7ukfFH2tkJHf3eQDx1iv5gfIN+TTsjupfUdmay2e6fbFruPZjfQpt9KYutiUCUuJ7XsYIO5oALXOMbw72B3deu6OdaNV1qarrPWT6Zyk1hrzZ2nFxyCKMNeC2Ps5PJLmnzz+gB7uVS5zoJFQlrN/KJ129hqEet6jt1URs+kMsy6+m57+493ZET7PDS3jwee0/eF6s+UZtuPyOXOc6LbZj8LjopJDfqOiuST9ruGiOFh5d3Ag+/wOefcgvZcS5/T87L0I6q0I8JkH5C3utu5VqtqSGWeI5OKRssbeOXNLW9wcPHA554Vv0/lsdNYcE3K7K7OaLBJO6vBHsmImhlsFrQ4ljWB/LQHN8/rCmWH6pdMM+zHup7jg3yZFjJKteXJMinla8csIie4P8AIIIHHnlBzfsdbNahjusmpu03N7DkNxu2rOJu0aRmqTss1Y4Wsmm/Rh9JzXA+oR7IBbzyt/j8blul/VzV7eRxGWzVGbTaevPyOJpvtNZbhnLnGXtHLGkP5D3ADwV1OMHQc5zQ3lzf0gHnkf0r9+r9L+bd/eKCjuu2MxeU06qMpjdhviDIQ2K1jV4HTXqE7Q4sssa3k+z5B9l36fBaQSqrw+Q3eHJdLt22/B5nIDGjNY+1JUxbnXvRmdF80nmqxAuY57YPaDR7JcOQOfHYv1fpfzbv7xUL6nvylDU85BoVHHZzea9eOSrirl0RBokf2tlkHIPaOHuAJb3dhAPKDkN2RyGuZHA53Ka1nKQs9Ur9+HHOoudckgfQlDXsjH6fjk+yT7iByQvTqMMldxfUDqBY1/K4fF5bO6zDRx16sYbtkVbTA+Uwk8tLy8NaHcEhg5A8LsuTppi9jg1q5slGGxm8Q9l2N9SaVsMNv0nRvcwdwLm8SSAB/PgjxyFl7P0013c8YzH5ii65TZYhtNj9eRnEsUjZI3ctcD4exp454PHB5CDjrqjquxdecvtubwesZbF0qusx4uGLOVTRmys7b8Nt0TY3+ezsgczucACZjx45KkOySZbq3ugzuL1TPYTH4XVMtTnfl8c+rNbsWWRiOtHG72pO0xF3c0FvJABPK66+r9L+bd/eKfV+l/Nu/vFByrs+mZi38jfC4Khh7L8zUwuHldimxdk5fA6vLLH2Hg+pxG8dp893j3rTdRYp9p6h6vv8uv707VjirOInrYdlyhlKMxmZI2V8ETmTOieGlpABHLWkjwF2H9X6X827+8U+r9L+bd/eKDjm5qEOnWOnu8YDT9pt4Wpl8jey2Kuia5lhLYgEDLhike97iPSB7ee4Nk57eeQLZ+TC67kNw6t5qxhcrhamVzFSem3LU31pJY20omF4a74dzT/R7jwQQLt+r9L+bd/eKyaePgo9/otLe/jnk8+7/wD6gyUREHJe19KMxb+UJ6UFfnQstbh2bJng9oyFMGJkXHu/hHOqynn3/NnfrUCo9PYMVmtz1nctb6g5f6ZztuzDNgL136LvVLUncDKIpmwxuaHFr2vA5DfHdyu4X4KnI9zjGeXHk+0V8/V+l/Nu/vFByprl250P37fYbmn7FnKmcuQXsXksJQdd9WJtaOEVpHA/wbozGQDIQ0h3PK27Zb+rfKC2G1Ywmamxu14rGVaeTx9J88VaWJ87XidzefS4EzXdzuBwD58LpT6v0v5t394p9X6X827+8UHEWBwWw2OmvT7pYNPzNHPYDN05r2WlplmPhirWfVfaisn2JDI0eGtPcTIQQPKnuhanFW6R9S6Ox6zkbtPIZ/OTzYuGs5tm9BJYf2mJp7S4vZ29jgRz4IPuXUH1fpfzbv7xUQ6W1qOa1h9gbXX3YC3PH9KVI2QsHa8j0e2Mkcs/RJ9548oOV7eO3jbOi/ULCRY/Ysvh6E+Nm176w0vm2VsxxTRTWIXNIa6Ts9Phj3NDnEkcu45Uwy2yWYuqmu9S26ps9zASYO3hZKrcPMb1GczxytkdW49TseGOb3NB9w54BXU/1fpfzbv7xT6v0v5t394oOMNT0PYBm9Cy1/XrlOC/veW2F1GSAudja01af0fX7eWxuLu13k+HSAe/wtj1A1afX+uWw7BmcXu2TwWcx1OOlb0u3ca6vLAJGvhnjrSNdw7vD2ucC0Eu8jkrr76v0v5t394p9X6X827+8UEZ6Ka1jNS6YYLGYfHXMRj2ROmjo5CV0liAyvdK9sjnOcS4Oe7n2j58A8KcLyrVo6kIijHDB7gTyvVAREQEREBERAREQEREBERAREQEREBERARFoc/veu6tlsPi8vm6OOyWYn+bY+pYna2W1J/JY0+T93Pu5IHvI5Dek8Dk+Aqs2q9gcx1Yo6i/V85JlbdB16bY8dC+CrVjHc1glsNc3uc4sIDfa+HPheNjVtk614DdtY6i4Nuta3NfFfGnC5d5tXakb+S+VzQA1snaPZ9/a9zSAQHGz8RiquCxVLG0ozDSpwMrwRl7nlsbGhrR3OJJ4AHkklZ011UTjTOCYmaZxhWei9Eshhdeir7NumV2HL97nyXICKsYB9zGsHceAPiSSTz7vAEg+yih+L5r/G/8FN0VmnucVumuc0oR9lFD8XzX+N/4J9lFD8XzX+N/4Kbop09zj8jTXOaUI+yih+L5r/G/8E+yih+L5r/G/wDBTdE09zj8jTXOaVf5PpBBax1mGpsWbo2pI3NitCyJDE8jw7tc3h3B88H3qEVMTU6V2tDwm4Y3N9QMzl53VZNjhpvmo1rAcDG6WEyObCCD4eGnj0ySR8L3RRp7nMibtc7Jql+Ahw5BB+Hhfqqi903udJsNvWd6YYz6W2fO2m5J2HzGUlFOSfu5lMfcSI3PDnn3gF3byQ0ACV4nqNipMtiNczN3H4fdruOjyD9dN1skzAQe8MPA9QNc147gPPaTwAqFSWIiICIiDGvY6pk4mx3KsNuNrg4MnjDwCPceD8VDsl0J6dZjZYdgu6RgrObh9P070lCMytMfb6ZB497exoB94A4HhTpEFA0PkUdPtYjy9nUZ89qGfyEXoDPY3Lzut1o/UjkLIzI5zQCY+CSC7te8c+fHjP8AJh26jq1HXcJ1x3WrjpbXr5izfmZayFiM+l/B1bR4dUDfTeWhod5ld3dw8LoREFAP6MdRMhuuIqZHeMpDoOs145cTLiczNFmcrOwQcsyr3M9KdjjHKfZAPt+T5K+dWyE+g4Hbuq2xdG8tid7yVqGtkMVr9lubvW4GCNjJIi0ta1nBLiwcfocnk8cdAoggGR65alg81puGzFuzh81tsTZMXj7VOX1XOIafTk7GubG8d4BDiByD58KQY7ftay+wZTBUs/jbWaxbe69jorTHWKzfHmRnPc0eR5I48hb17Gv47mh3B5HI54P3rTu0nXzcydwYShHdycJr3bcVZjJrMZHBa+QAOcP6Sg2ta1DdrsnrzRzwPHLJInBzXD7wR4K9VVlv5NunRdNHaLr/ANKaZhPnnz5r9eyEkFhk3PJLZHFxAJ+Hu8BbjJ6Fscu56xlMZvV/HYLFwfN72Ekqx2BkwAQHyTP9pr/0eXDyeP1oJ2ir7Fx9Tcdlt1sZKXXsviux8utUqjZYbPfw8tisvd7HB/gx3NHxcfh51eU6m73q/TDH5/K9M7eS2WS0YLeva9fjtPgi5fxM15DQ4ENb7I8jvA58FBaqKE5Pq1icP1Lwmi26eUZmcvVNuvNHSe+oABIXMdOPZa8CJ54P/V+8LJ17qxp+12tlrYrYqNubWpnQZhok7fmL2l4d6pdwGgGKTz7vYPnwglqLHo362UqR2qdmK3WkHLJoHh7Hj3chw8FZCAiIgIiICq75OWV1fMdO5bGoa9a1jEjJ3GGjb57zMJnCSTy53hzuSPPx9wVoqF9JrW7XNUfJv9OlRz3zyw1sVAgx/NxIfRPhzvJZwT59/wAAgmiIiAiIgIiICIiAiIgIiICIiAiIgIiICIox9Z7X83D/AHT+1BJ0UY+s9r+bh/un9qfWe1/Nw/3T+1BJ0VaVutmJu7nZ1StaZZzlZodYghqzPjg5aHBskoHpscWkENc4OII8LW9VdfqdRcTj2ZrI5LG4/E3GZSRmJuSVhY9LlwZKWHucwEB/DSD3MaQUG1udSX7ln920bU/n+N2jDUQW5q9i3ux0NmRnMbO53AkcA5jy0eC13I54IGz1Xpy2vidWm3CanuW34OB8cexWcfHFKHv473xtHPpkhrQe0+e39ZW20XbqW+6rjtgxs7bONyELbFadsbmCSJzQ5ru1wDhyD7iAVv0BERAREQEREBERAREQFqLuo4XIbBRz0+JpTZ2hG+KpkpK7XTwNeCHNa/juAIJ5APxW3Ubm2SzHK9oZFw1xA5B/agr9m35v5PXT8W+pecyG9h+X+bx5XD4Q+pWrSfxbrEcXPhpBBcBz7TQA4++42u7mgjngjnyOFGfrPa/m4f7p/aoDjsficP1oymw183bbsWXxjfnOCfkHvqvijcxjbIrkntcPDO4cN9p3jkkoLlRRj6z2v5uH+6f2p9Z7X83D/dP7UEnRRj6z2v5uH+6f2rEy++swGKt5LIzVaVCpE6exYmJayKNo5c4nnwAAgmSKv9S6rVd4wzMrhyZqMji1klinPWc7j4hsoa4jz4PHB+BK+8x1SqYDKYXHX5oYLmZnfVoxejI71pGxukc3kchvDGOPLiB449/AQT1F+DyF+oCIiAiIgIiIC0t3SsBkaOYp2MNRfWzERhyLBXa352whwLZSBy8cOcPP8o/et0iCtc18nnS8tpGE1KvUuYPA4W0blGthr0tUxSEvJ9pruSCZXngnj2lubehXrHUyltbNtzcNGCqa0mttkZ9HzHhwEhb293eC7nnn/Jb9ymKIKzxmD6p6/g9zfLs2F2zMTyOm12K7QNKCq0lxEU5iJc8Dlo7h5Pb5PnleeT3nqLrOpapYtaBHsmeuziDMwYPIMjhx4J/jW+r7Ujfd4HkeVaCIIW3qlTd1SforsLnY7oqfPG5V9Bwx0jeAS1s/PBcOQCOPesTWevWgbfgM5nMbs1Q4fB2DUyV6219WGrKCAWvdK1o+I8+7yPKn61+a17F7JjLeNy2Np5THWwG2Kl2Bk0UwHHAexwIcPA94+AQemLzOPzdCrex16tfpWo/Vr2aszZI5mfymOaSHD9YUC6AYqjhtCkr4/dH77XORtv8ApaSX1C1xlJMPPc7+LPs8c/D3BZez9B9E26PUo8jr8Xo6nOyxhYassleOm5pYQGsjc1pb/BtHa4EcD3KuvkzabrFy7k971Cfa9fw1qe3QdqGRu846OdszQ+zHD5Ie5zHHuLjz6h8DwAHQaKqsVqnVjVOmuYpR7viN13V1tsuMyWexfzKpFBzHzDLHXJc4gCXh4PJLm8+5bLMZ/qTiLOiV6uq4rYBc7ItnuVcj82ix7j6YfLXZIC6Vg5mIafaIa0e8oLDRQfG9S7NzqHsGtWdRz2Oo4usLUexT1x9H3BwwuZG8HkvBe4dvHnscfuWt1v5RWg7L0+n3ZubGL1yvb+Yz28xC+mIZ+WjscJAPi9o5Hjz7/BQWUiwamcx1+Om+tfrTsuxCes6OZrhPGQCHs4PtN4IPI8eQs5AREQEREBERAREQEREBERAXEd7cc5F8mTYsn9OZBmWh22Soy587eJ2RjOtj9IP57g30/Y7eeO3x7l24qMzHyXdNz2Qu3Lus+rLcttvzRtvzMhNkOa/1hE2UMbIS0cva0F3kEkEghVZyOc6d9YMhb2+7sdwZW9bdrclPIl2IsMFdzo6MtYfxcrQ1xDu323N57veDG+klTqtvOM03equTD/pKeC7fmn2mWapNWc/+Hgbj/mojiIb3Nb2v7muaOXO889DUvk863j90dtcOv/8An0zyWRPJckkZHNICHyMidIY2PcCQXNaD5PlRa70t6adLeoOuW5sU3D7DncjKzEwxTWHQPtuY4yFkLXGGJxaXcuLW+8+UFL0MHNpnTL5SGaxObz0eSpZPI14Jn5aw8x9lStIJRy/xKCePU/S7QBzwArI6jZ6+zfOnNGHI2W07+u5uW1WZO707Dm16xjc9oPDyC53BPPHcePeVcNHpLj8fU2CrDhozWz9mW3k4ZZfVZZkkjbHIXBziAHMY0Fo4Hj3eStBrXybNW1HNUMvjMA+PJUIZK1axPkZ7DooXgB0Q9SV3scNHDfc3z2gcnkNj8kwg/Jv6d8Hn/wAyVP8A7LFbaifTPp/iemesDCYTHNxWPbPJMypHI57GF55Pby49o5/yRwB8AFLEBERAREQEREBERAREQFwnuGQz9bTusG7xbbsEeU1farUeMrMyLxUjhjlhcYnQ/oyNcHuHD+eBx29q7sVT5Lobg8lhNmws2E9TE7FclvZKD53IPnEshb3u7g/ubz2N8NIA493vQUTsBzG8ZLrDl7O45rWpNOkdXxNTG3jXgriOmywLE8Y8TB7nnxJyO1vA496wtQx/2i9eNI2TKWstQyOQ6f1czNXp5KxXjE3rwkx9jXgGLl3tRn2XHy4Eq8tu+Thqu95t2WzmtsuXpGMjnc21JEyyxh5Y2eNkjWTAfASBw+Czts6GYTdr+Ku5bCercxYLaditbfWkiaeOWd0T2ksPaPZPLfHuQQ75Q+Yv4PR8ZZx92xj5TsGJikmrSuicYn3YWvaS0j2XNJBHuIJBUP6pP3HYequ4a3qOenxuQfpNSenGbTo4o5zfmD3N94ZI+NvYJOORy0/AK+do6cwbpgbmFzeLjyGLttDJq8rwA4Agg8gggggEEEEEAjyFVn2P9MKG+W9JfjJH7VnMKJ7Fee7clms0Yp+WvdO55Hc2Tjgl/fwAB7IHAVHleoWVi1zB6brtjZMfm7+z/ROYg2XNltym/wCa+s2vHeDJiGy9rSyRocSHOA7efZ8uq+jbviegXVCLashZZhY6sFrF1m7FPkbEUgLmzMlndFE6SJwcwiN/d5B/Vx0ZF8mvUotRuaydXhmxFywLdhliy+WaScccSmZzzJ3gAAO7uQBwCF6Vvk663U03L6qzBPfhMue6/DNkJpJLB4aOXTOkMnPDGj9L3AIIS2O1qnXPRdbrZbKz4g67k3uiv35bBnkbPW7XyOe4mR7Q9wDnckAkDwq0wmau5rfNNlvX7F/0Opuw1YH2JnSenEyvabHG0knhrR4DR4A9y6U3PozjOoDsfJm8U+xPj3vfVs17klaaEuHDw2SJ7XgOAALeeDwOQeFp4fk16lX12HBRavFFi4MkcvBBHZe0w2ySTLG4P7me8+GkDgkcceEF1j3Bfq/B7gv1AREQEREBERAREQEREBERAREQFC+k1rdrmqPk3+nSo5755Ya2KgQY/m4kPonw53ks4J8+/wCAU0VbdAMVRw2hSV8fuj99rnI23/S0kvqFrjKSYee538WfZ45+HuCCyUREBYGbwOM2XHSY/L46plaEnHfVuwNmidx7uWuBBWeiCIZrpHp2w7BrubyGv1J8prvAxM4BZ8zA44DGtIaAOB44+AWDjOjmKwe1bZseNyeYq5TY4THYLrzpIYH8cCSGJ3LWOHj3fcp6iCpn9Peo+t9LY8HrnUIZTaorfrNzmzUhN6kPJ/gXNZ/UO73+9SC9ld+pb7rFCvg8bktTnquGYy4telPVsBriCyI/pMc4MHjyO48+7zOVW+/4qlc6tdMrk+6vwFupLkDX19svaM53VwHNLe4d3pD2/c7jn4e9BZCIiAiIgIiICIiAiIgIiIChe85TdqGy6dBq+HpZLC2r7o8/ZtPDZKlXs5a+MGRvLu7x4a/+j4qWZFtp+PstoSQxXjE4QSWGF8bZOD2l7QQS0HjkAgkfEL+UvU/5f3WmlvtKllaeHwGS1XJyizj8c23DBbkbzG6Ky35wfVjBHIHjz55Qf1iRUr8kXqxufW3o/Dt+6YvG4qzeuzChHi4pI45KjQ1oe5sj3nuMgmHv4IDSB8TdSAiIgIiICIiAiIgIiICIiAiIgIiIChlrKbszq7Sx8GHpP6fPxD5rGWc8fOWXxLwyIN9TnsMfnn0z5/yh7lldUchs+J6eZ+/ptand2erUfPRq34nyRTvZ7RjLWPY4lwBa3hw4cQT45C/llY/5SLq1L1Ar7OYcNGYKDsecIxtsY5/L+71nQ/OPMw/RD+f0fHCD+uyKDdENj2ncOk+tZzdKNLGbJkavzuzUx7Hsiia9xdEA17nOa70zH3Ak8O7v6FOUBERAREQEREBERAREQEREBERAREQEREBVd8nLK6vmOnctjUNetaxiRk7jDRt895mEzhJJ5c7w53JHn4+4K0VC+k1rdrmqPk3+nSo5755Ya2KgQY/m4kPonw53ks4J8+/4BBNEREBERAREQFV3UbKatT6ydKamY161k9hty5IYXKQ8+njXNrgzGT2h+mzho5DvI+CtFQzbLW7Q7/pMWAp0p9TlfbGxT2CBNC0RA1vSHcCeZOQeA7x9yCZoiICIiAiIgIiIPOxPHVgkmleI4o2l73n3NAHJKirerOpvaHNzMbmkcgiKTg//AErdbV/zYzH+pzf+Aqu9e/8AQGN/1aL/AMAWNy7RYtxXVTM4zhvw+kubl2WanTTVm44+tK/tX1T8Yj/Kk/dT7V9U/GI/ypP3VpEWpr9ru596PC5Hpue7+P2bv7V9U/GI/wAqT91cJ/LL+T9iurnXTVNk1e6xtLOyR1NhsRxuAqCPgfOSCPa5iBbwPjG0e9y7RRNftd3PvR4T03Pd/H7MrXN20TU8BjcJir8NPGY6vHVrQMik4jjY0NaP0fuAWx+1fVPxiP8AKk/dWkRNftd3PvR4T03Pd/H7N39q+qfjEf5Un7qfavqn4xH+VJ+6tIia/a7ufejwnpue7+P2TLAbTitoZO/F3GW2wODZO0EFhI5AIIHwW2UB6bf84Nq/7yt/9pT5dCrDZNO6YiesRL0lm5pbdNzDDGIkREWC0REQF+Oc1jS5xDWgckk8ABeGRyFfE0LF23K2CrXjdLLK73NaBySqvyU9rcpfnGWY+Kh3d1fEk+wxoPh03B4kefBIPLW+AASC91lNMYZ1U4R53efo2LNmq9OEJjZ6napUldG7PU5Ht8EQSerx/T2crx+1fU/xiP8AKk/dUciiZBG2ONjY2NHAa0cAD9QX0pz7PLPWPC6OoU8yQ/avqf4xH+VJ+6n2r6n+MR/lSfuqPImfZ5Z6x4U6hTzJD9q+p/jEf5Un7qfavqf4xH+VJ+6o8iZ9nlnrHhNQp5kh+1fU/wAYj/Kk/dXAW0/Je1vM/LQhzDZYX9Mbsv09dd6bhG2YOJfU7eOT3y8HgAARvIB5au3ETPs8s9Y8JqFPMkA6r6mBwMvGB/3Un7q/ftX1P8Yj/Kk/dUeRM+zyz1jwmoU8yQ/avqf4xH+VJ+6n2r6n+MR/lSfuqPImfZ5Z6x4TUKeZIftX1P8AGI/ypP3U+1fU/wAYj/Kk/dUeRM+zyz1jwmoU8ySw9UdTme1pz1KEu8D5xJ6IPnj3v4+PhSeORk0bXscHscA5rmnkEH3EFVk9jZGlrmhzSOCCOQQsTGsn1KY2sGzth5LpsUH9kE/3lo90b/j3DgE/pc+CJibVeynGJ9c4x1wjDzuVXMhmIxonFbSLDxGWrZzG179OT1K87e5p44I+BBHwIIII+BBCzFVMTE4S5QiIoBERAREQEREBERAVbdAMVRw2hSV8fuj99rnI23/S0kvqFrjKSYee538WfZ45+HuCslVd8nLK6vmOnctjUNetaxiRk7jDRt895mEzhJJ5c7w53JHn4+4ILRREQEREBERAVb7/AIqlc6tdMrk+6vwFupLkDX19svaM53VwHNLe4d3pD2/c7jn4e9WQqu6jZTVqfWTpTUzGvWsnsNuXJDC5SHn08a5tcGYye0P02cNHId5HwQWiiIgIiICIiAiIg1e1f82Mx/qc3/gKrvXv/QGN/wBWi/8AAFYm1f8ANjMf6nN/4Cq5wcjIdcx8kjmsY2pG5znHgABg5JK1Mt/oU+36PN/jf6Lftn6NkihA659NyQB1B1Yk/AZqt++vz7dOm3/6g6r/ALarfvriZtXB5jRXOWejU7B1/wAHr97Kg4fPZDEYeV0GTztCkJKNJ7ePUD3dwe7s59ssY4N4PJBBA8di+UPhdfyey1GYPYMtFrjYpcpdx1SOSvXhkgZO2buMg7m9j/IaC4drj28cE1pieif0JsuehudJsD1Cx+XzE2UpbNNPVaWV7EnqOZMJAZCWdzuCwODhx7lNZ+m2bjk65srYxrK+xUoa+GYyWMNn7cY2DtA7vYAeO32u0fH3eVfm248+xuzbsUzx/f1x9+CUbL1sw+Cy9HFY/GZja8lapNyXzbAVRO6Gq48NmkLnNAa489o5LjweAV8/J72zKb10Z1bPZqybmUvVnSTzmJsZefUcB7LQAPAHuAUE1XUt46V7RBlsdqo2atmdexVC9BHkIYJsfaqROZ5Lz2vjcJDyWEkFp4B+O36ObHhOjfSnVtV3jYcHq+x06hNjHZDLV2SM7pHkH9Pgg/ePCiqmM3CnbuYV26It4W9s4x653Tjs7Nq6EUH+3Tpt/wDqFqv+2q376kOt7fgtxqS2sBmsdnK0T/SkmxtuOwxj+Ae0uYSAeCDx+sKmaZjfDTmiumMZhIem3/ODav8AvK3/ANpT5QHpt/zg2r/vK3/2lPl6if00f+tP/MPoWSf29v2R8hERYtsREQQbqjZMjcFi+f4O5d9SZvHIcyJjpAP74jP9S1S2vVGsYm4LKcfwdO76czueA1krHRgn/wCMx/2rVKy7+ijDdhPXGfpg7uRYaLZxVVrHVvM5nrXt2nz61f8AovF/NWQZCJkIjh74pHufMTN3Fry0BnYwn+UGr303r1R3zXZ89iNV2efER1pZ2WfmUZ9d7HhjoY2CUue/nn3Ds9l3tcgheFHC7NqnXLY8rXwBy+v7NDj2vyENuKM0HwNkY/1I3uDnghwcOwH4hRzE9N9zxvyU62pUo343a4oe19aK42J7mfOjJJE2ZhIY6SLuaHA+C/3j4a6+Jrj4/ZKq3X/CnBbZfyGGzuEt6zSGQv4nJVWR2zXLXua+MCQseHem8Dh/gt4PC9aHXrBT3bUGRxuZ19keLmzUE+VqCNlunFwZJIw1znctDmkse1r+HD2VT0vRjYXw9SfoTp5DqtHYNPdjKVCO/XfI62x0nAmLX9ofJ6vhwc4cR8ucCeFZ+0aXnLG8aJlq+GjylTEa/lKluCaeNjHTSx1hFC7knkPMb2kgOA+Pw5Iiq5Mbfl62Js/yhLEPR/ZNxxOn7BXfSoC1T+lKsUcczZGOdHP4lPMbeAXDnvAI9nyvrXOq1vUtX16tn4dm2rbs22W1FiWY6pHejiZwHucyJ7YWRA8Fpc8uPqAck+BBq/SzdbHTzqfhcfrdnW8HlcMK2G1a3l4rfpXC1/qGF4eWQwu5YAwuA5BPDR4U/wBo1rZta6g67u2CwX1ldFgpMFfxcduKvOwGSOVksbpHBjuHMc1wLh4II54RETXO2fO1m2/lE61BhNdyMFLMXnZy/LioKNaoDahtxteXwTRucCxwdGW/EAkEkN9peOU+UPRxVnJ1ZNP2qW/iqMWRyFSCrA99SCQPIc8iftJAjdy1ri4/AO4PES1jpHtdHL6XmMhRhbbl27JbJlq8Fhj48eyxWmYyMOJHqEExgloPtEn3eVKb2iZybd+rWQZR5qZ3X6VLHSetH/DzRx2mvbx3ct4Msfl3A9rwfB4MoquTHng22odccNuGdxWNjxeZxjczUfexFzJVWxQZGFgaXOi4eXAhr2u4e1pIPICsVUhQ0vL679iWRycEVChqODsRZyzYtRMjpH5hHH7Ti7ggOY4FzeQOOSePKncXWbp5kJGVYd81qaWciJkcWYrl73HwA0B/JJ58cKFlFU4fnRj/AMpbXpJYpq2F2G5hJ8lFia+ehpN+Y2LD5xDwx5eHFoeTy/t7T2ngk8A7u/1sweO0vctmkq5B1DVblijdjbGz1ZHwloeYh38EHuHHcWn7wFRGYxW49PemGm6HmdYaMZhtnxVeDZK96Ew2oRkGGJ3o8+o15BAII45BPPwW+37p/v0ejdV9Ow2pjMt2bJWclRygyMEMIZOIy6NzXuDxI0tcB47T45cFKjSV7fZw7VrbR1sxut5jIY+vg89sLsYxsmTnw1Ns0VEOZ3gSFz2ku7CHdrA5wBBI8hY1nr5hZNkpYPDYjNbNdu4mvnK7sTBG6KSpM5zWyd8kjA3jt5Idx+k3jk8gVxn+jNnD9Rdtyc3SzD9S6WfnjvVLluetFNRl9Jsb4ZTMOfT5YHAsDuOT7JKsbWNCta/1nu5WriYcdrY1aji6oruYI45IrE7nRNYDyA1r2ee0Dz49xRnFVyZ/dnQ9a8FLp0WwfNr7A/LjBfRro2fO23Da+bekW9/byHe1+l+h5/Up+uc8bqwzHyqsjTx92vc1XH+ntN2rA8PFfMOjfTax/B9lxY0y8Hz3DldGKFluqascWw6aWTXy2x4oH+CjfDfjbxwGCYOa4D+l8L3f0vKnygPTSqbGW2PKgfwUj4aEbueQ8QhznEf0Pme3+lhU+W5e/V+0fKHn8ow0tWAiIqGuIiICIiAiIgIiIChfSa1u1zVHyb/TpUc988sNbFQIMfzcSH0T4c7yWcE+ff8AAKaKtugGKo4bQpK+P3R++1zkbb/paSX1C1xlJMPPc7+LPs8c/D3BBZKIiAiIgIiIChm2Wt2h3/SYsBTpT6nK+2NinsECaFoiBrekO4E8ycg8B3j7lM1W+/4qlc6tdMrk+6vwFupLkDX19svaM53VwHNLe4d3pD2/c7jn4e9BZCIiAiIgIiICIiDV7V/zYzH+pzf+Aqu9fHOAxoPu+axf+AK1LEEdqCSGVgkikaWPYfc4EcEKKt6T6mxoa3DxtaBwAJZOB/8AUsblqi/biiqqYwnHdj9Yc3Lsj1ymmnOww9SO/R9X/Nofywn0fV/zaH8sKR/ZTqn4Qz82T95Psp1T8IZ+bJ+8tTULXeT7seJyPQs958Pu0YAAAA4AX6t39lOqfhDPzZP3k+ynVPwhn5sn7yaha7yfdjxHoSe8+H3aReUtSCZ3dJDHI73cuaCVIPsp1T8IZ+bJ+8n2U6p+EM/Nk/eTULXeT7seI9CT3nw+6OfR9X/NofywvWKGOAERxtjB8kNAC332U6p+EM/Nk/eT7KdU/CGfmyfvJqFrvJ92PEehJ7z4fdrum3/ODav+8rf/AGlPlqsBq2K1dk7MXTZUE7g6TtJJeQOASST8FtV0KsNkU7oiI6REPSWbeit028ccIiBERYLRERBj5DH18tQsUrcTZ6tiN0UsTvc5pHBCrDJV7WmyfN8q90tDu7a+VI9l7T7mzcDiN48Ak8Nd4IIJLG2uvxzQ9pa4BzSOCCOQQrKaowzaoxjzu8/RsWb1VmcYVhFKyeNskb2yRuHLXNPII/UV9KT2umWq25XSOwNKN7vJdBH6XP8AT2cLx+yjVPweP82T95TmWeaekeJ0dfp7aUeRSH7KNU/B4/zZP3k+yjVPweP82T95MyzzT0jxJ1+nlR5FIfso1T8Hj/Nk/eT7KNU/B4/zZP3kzLPNPSPEa/Tyo8ikP2Uap+Dx/myfvKkMrrePh+WVgtWZC9uvT6ZPfkx4mf6brAthgkI594b4TMs809I8Rr9PKs1zQ9pa4BzSOCD7ivIUazSCK8QI9xDApP8AZRqn4PH+bJ+8n2Uap+Dx/myfvJmWeaekeI1+nlRPJYijmYY4chSr3oo5WTsjsxNka2Rjg5jwHA8Oa4Ag+8EAhZakP2Uap+Dx/myfvJ9lGqfg8f5sn7yZlnmnpHiRr1HKjyKQ/ZRqn4PH+bJ+8n2Uap+Dx/myfvJmWeaekeJOv08qIY/D4zAttuo0amObYmfasOrwtiEsrjy+R/AHLifJcfJ+K+sa+fbpjWwb+6HktmyoZ3wQ/eGH3SP+HaOQD+lx4BmMPS/U4XtccDSmLfd84j9YDzz7n8/FSeONsUbWMaGMaA1rWjgAD3ABTEWqNtOMz64wjpjOPnepry6ZjCiMFFfKK+UDS+SXpGGsRafk9gozmSBklR4jr13t7SBPMQe10heSD2nuLXn+nhPfv+U76r7LK5muw4rUK/8AkmvWban/AK3TBzT/AFMC/qtmsLj9jxNvF5WlBkcdbjMNipajEkUrD72uafBC4U3/AP5N+vgOs2m7PolaHJ6eM7SlzOt5J7XiCsJ2OmLDKeJYuwOBjcS74Dv7uBVMzM4y5btPpzmq2e0nDz19oo7o+OtHDYzuPMXpXZ2sAklDYiWMLncu7GnhvdwpKq42L5POg7FoEmlnAxYnXH2heFPDPdRayfnnvb6Rbx5+Hu93jwsvJdMrs+66vm8duGaxONw1f5rNgIZGvp3owCGmQEc945b7XPuaB95UCeIq8xmP6l4rI7xbuZXC52lKx8us45td1d8L+HlsViQc9zefTHcPP6R+5azK9VNx0rpjis9sfTrJZTYprRrXcFqLxkHwM5k4maT29zSGMPHvBkA+BQWsiht3q3reN6m43QLVmxDs2SqG7VgNSV0UjAJC4eq1pY1wETyQ5w8ccc8hbfA7vru1WMnBhs7jcrPi5nVr8VK2yV9SUFwLJWtJLHctd4dx+iUG7REQEREBVd8nLK6vmOnctjUNetaxiRk7jDRt895mEzhJJ5c7w53JHn4+4K0VC+k1rdrmqPk3+nSo5755Ya2KgQY/m4kPonw53ks4J8+/4BBNEREBERAREQFV3UbKatT6ydKamY161k9hty5IYXKQ8+njXNrgzGT2h+mzho5DvI+CtFQzbLW7Q7/pMWAp0p9TlfbGxT2CBNC0RA1vSHcCeZOQeA7x9yCZoiICIiAiIgIiICIiAiIgIiICIiAiIgIiICIiAiIgIiICIiAiIgIiIC52zX/T41z/ANwbP++hdErnbNf9PjXP/cGz/voQdEoiICIiAiIgIiICIiAiIgIiICiFvpFpdvD7JixrWOqUtkY5mYZRhFV14ODgTI6Ltc5x7ne1zz5PlS9EFYZfoTVGn6xrerbPsGkY3AWBLAzEXXEzx8kmGZ0nc6RntHwXf/wFvm4TdGdTnZP6zU5NIfU9P6BdjwJ45wP4wWA7kgnyQR93H3qYogq7Gbr1IxGm7Pldm0etbytCxxjMXr14SyZCDkAO5fwGuAJ8H+Sf1LJy/XjAarS0h+zU8rr13bZGV6dCzSfLLDO4sAim9LuDHd0jR5PHP9BVkIgj1PqDrV/cr2pV87Rl2ejE2eziGzt+cxRkNIeWe/t4ezz7vaH3qL/J/wAXRxGgyQY7dX79WORtyfSz5fUIc6Ukw89zv4s+z7/h7gprHquFhz8+cjxFGLNzw/N5cnHWY21JF7PsGUDuLfYZ4549lv3BUP0X6baNuvQbM4LQa2zdMcJfzEznvx+RfFkIrEMjGukjme6Qta70GeOfI593JQdGIq8yWl7tVyGjx6/u/wA3weHjZBmK2UpNtWcuxoYO91gnuZIQ13JA8l5J+C98bk+odbc9o+lsThZtQgr+rhX42aQ353gDmOZr+Ggk93Bb48j9ZQTxFU32/fQPSz66bhpmw6v22/mkuJ+bfO7UfngSdsfvj957vuCmMnU7Vq2dwuEt5ynRzeZrizQxtuURWLDCOfZY7gk+D49/goJQi8K92vbfKyCxFM6F3ZI2N4cWO+48e4/qK90BVvv+KpXOrXTK5Pur8BbqS5A19fbL2jOd1cBzS3uHd6Q9v3O45+HvVkKruo2U1an1k6U1Mxr1rJ7DblyQwuUh59PGubXBmMntD9NnDRyHeR8EFooiICIiAiIgIiICIiAiIgIiICIiAiIgIiICIiAiIgIiICIiAiIgIiICp3q30Hyu57tS3nUN3t6RudLGPxMVsUK96s+u6T1C18UrfeXf5Qd4Hw++4kQc1jqb196T+xunT2l1Iw8fh2Z0eYtthv3upyeXu/UzgfrUx6ffK46X9RLv0bBsTMFnWu7JMLsMZoW2P/kdsnAc79THOVxqIdQOkWl9VKXzXbdYxuejDe1j7ddpljH/AFJB7bP6WkIJcDyOR5C/Vzh/5KOf6dH1ekPU/OajCzyzAZk/SuL4/kNZJ7UYPxcC4r6HW/q90w/g+pHSt+wY6Pw/YOn0puM4/lOqSEStA95PPHv4CDo1FWfTX5SXTbqzK2tru1Upsnz2uxVsmrca74t9GQNeSD4PAI/WrMQEREBERAREQEREBERARFjZLJVMPjrV+/aho0asT57FqzII4oY2guc97iQGtABJJ8ABBkosbH5Kpl8dWyFC1DeoWomz17VaQSRzRuAc17HN5DmkEEEeCD4VQDZLvyldPgt6Ds2a0bHU896VnJS4vsmyNeA8vFf1Pcxz+G9xH+Q9rm+9pDdZrqaze7W86R07ztOvv2CrRh9m9UkmqVJZCeGuI8OeAD488EjkO4c1TzWqORxmvYypl8iMxlIK0cVrINgEAsyhoDpPTBIb3Hk8A8DlZsNSCvLPJFDHHJO4PlexoBkcGhoLj8T2taOT8AB8F6oCIiAtdd13E5LJ0slbxlO1kaRJq25q7HywEggljyOW8gkeCPeVsUQQSl0S1DD39yyWHx0mFzG2xOjyuTo2JG2JCQ/+EYSSI3gyOILQPPHv4Cxvk/5DX8n0pxE+r7Xmd2wnfOyDN5+WSS5OWzPDg90jGOPa4OYOWjw0e/3mxFXXyf8ALfTfSvFXPqD9mXfLYH1Z9D0Pm3E7x3dnpRcepx6n6A57+fPvIWKoZtlrdod/0mLAU6U+pyvtjYp7BAmhaIga3pDuBPMnIPAd4+5TNVvv+KpXOrXTK5Pur8BbqS5A19fbL2jOd1cBzS3uHd6Q9v3O45+HvQWQiIgIiICIiAiIgIiICIiAiIgIiICIiAiIgIiICIiAiIgIiICIiAiIgIiICIiAiIgr/qZ0C6fdYK8jNs1XH5Sw5va296Qjtx/d2zN4eOPu548e5VS35OXU/pZ/CdKurNyxQj/Q1zemfP6vHwY2do9SJo+5o/rXSyIObW/Kf3Tpt/BdXOlOWw9Vnh+x6sfpTHcfy3tb7cLf1O7j+pW505616J1bqifUNpxucPb3uggmAnjH3vhdw9v/AMTQpsubvlcdIul2P6V7ZvuW1mjS2HF0nz0ctQjlr2HXnexWDnVy17+6Z8bfaPA7uSQByA3nTf5YGmdQ+s+2dM/Ts4bYMLkJ6NX585gZkvRPbKYyD4cHtl4YfJY1r+eS5jL1X8GtT6RdTMjeqXdc03aJ7UMgmgtY/G2O6N7SCHte1vskHgg8j4L+w/yWtu6h7f0qrTdTdemwGz1JjUe6w0RvvMaxjm2DGBwwnuLSB/lMceGggALeRQrOdQJBampYKoy/NC90c1yd5ZWiePBaCBzI4HwQ3wDyC4EELRyZnbZj3OztSAn/ACa+OAaPv/Se4/8AzV2jw/XVEe3H6RPxbVGTXa4xiFooqr+k9s/0jj/2fH+1PpPbP9I4/wDZ8f7VOZR3kf7fws1K6tRFVf0ntn+kcf8As+P9qfSe2f6Rx/7Pj/amZR3kf7fwaldWouGP+VD62/V3R8V03x1gNv51wu5FrT7TKkb/AODaf+3K3nn/APZI+K6T+k9s/wBI4/8AZ8f7VRXUz5IWv9XN+t7ftGUs5bK2WxsfDP3trhrGBjWtZHIwtHjkgOHkk+8qMyjvI/2/g1O659+Qf19yuwdUtM0rdt9yNLA4WvNHrtJ1n0obVp/bHFUmd28yNDHSek17hwQ1jfexq/qUBx7lzt0v6RdMOl92lz03wOLtQyNfWzscXzsRSBw7SZJy6WF3PHDgXAfFwPC6KWFVE072rXbqtzhVAiIsFYiIgIiIChfR/F7thtAx9TqHmKWe2xj5jav49gZDI0yvMQAEcY8RlgPsDyD7/eZoqp+S/i9Jw3RbC1OnmYu57U2TWjVv5BhZNI42ZDKCDHGfEheB7A8Ae/3kLWVXdRspq1PrJ0pqZjXrWT2G3LkhhcpDz6eNc2uDMZPaH6bOGjkO8j4K0VDNstbtDv8ApMWAp0p9TlfbGxT2CBNC0RA1vSHcCeZOQeA7x9yCZoiICIiAiIgIiICIiAiIgIiICIiAiIgIiICIiAiIgIiICIiAiIgIiICIiAiIgIiICIiAiIgKG9Rs1NBDSwtSV0NnJl/qSscWvjrsA9VzSPIcS9jARwR38g8gKZKttz7/ALQ63dz6f0Uez7ufW9r/AP0V1rZM1cImfPzbOT0xXdiJY8EEdWCOGGNsUMbQxkbBw1rQOAAB7gAvtFyvi85sGsdB926kfWDNZbP0bWWhoQ270slWrGLskTSYee1/ZwXAvDiAA0cNAC1d70FVeZ54OqFh4fNUNhxsOQxlyDIUJwTFZrPD45ACQS1w8Ecg+QqO0DQeocGx0JbuSsN1i9Tniyjpdvnycs4fEfSmrE1ovQeH8HmNzRw7wOQFXurY25qnyMcVk8Dn8zjsrkrGNi+cjIzSCsTkmRkQsc4tjBD3BzWgB3ucCoV6Wd+HH4Oq8xs2NwFzFVb9kwT5Sz8zpt9N7vVl7HP7eWghvssceXcDx7+eFtFSG5a7Noe8dKmY7YdjmZcz01e3HdzNidlljqczyJGOf2kB0bSBxw3zwByq81M9V+qeLn3DCXfmuTflLDIPX2eWKpVZDZdH83kx4quYR2M4Jc8vPd3dw5ADBM3ZicJh1ki5t2DPbFR2/OdJ48xkxks9nK9/GZIWpPnFbESh01r05OeW+k6vNE3z49aID4LS1vtP6rX9yy+AvSUrmNzdzF4931nlqQUfm7+1jZaTar2S8gBzu95Lg/wWjjhgTe7Ih1VJG2WNzHtD2OBa5rhyCD7wQtx05yr4JruvzP7xTYyamSST83dy0MJPvLHNI/7LmLTV/U9CP1u0Tdo7+z9Hu488fq5X3rXf9pWP7Oe36Kuep93Hq1u3n9fPPH9a2bP5s6id0xM9Nv2/dTldMVWpmexZyIirefEREBERAVdfJ/y3030rxVz6g/Zl3y2B9WfQ9D5txO8d3Z6UXHqcep+gOe/nz7zYqhfR/F7thtAx9TqHmKWe2xj5jav49gZDI0yvMQAEcY8RlgPsDyD7/eQmirff8VSudWumVyfdX4C3UlyBr6+2XtGc7q4DmlvcO70h7fudxz8PerIVXdRspq1PrJ0pqZjXrWT2G3LkhhcpDz6eNc2uDMZPaH6bOGjkO8j4ILRREQEREBERAREQEREBERAREQEREBERAREQEREBERAREQEREBERAREQEREBERAREQEREBERAUO6i4WaxDSzNSJ09nGF/qRMaXPkrvA9VrQPJcCxjwByT2cAckKYos6KsyrFnRVNFUVR2KugnitQRzQyNlhkaHskYeWuaRyCCPeCFpsLpGDwGv2cHSx0bcTZksSTVJi6Zkhne583d3k8hznuJHu88AceFNc50+kNqa7grTKE0z3STU52F9aV58lwAPMbifJLfBJJLSSStG/DbbCe12DqTkf5VfIgtP8AeY0//JToc7bRMYeuYj5/R3aMqtVxjM4Sh2k9GtP6dZB97X8S6jZdCazXPtzzCKIuDjHG2R7hG3lrT2sAHgfcsWv0H0apWyVaHCOiqZCzHbnqsu2BD6scwnY5kfqdsf8ACNDuGBoPxBHhTj6M2v8A0cj/ANoR/sT6M2v/AEcj/wBoR/sTV6+Me9T/ACy0uT7sYYGa1XF7Dfw93IVfnFnEWTcpP9R7fSlMboy7hpAd7L3Dh3I8+7nhRmXobo8u2O2T6CbHln2m3nviszRwyWGkFszoWvEbnggHuLSeRzysuzvOQq9TaehSYB/1gt4x+WiYLbPT9Bsnpkl/Hg93wUq+jNr/ANHI/wDaEf7E1evjHvU/ymb1id8wxJtcxk+w1s7JTjdl61aSnFbI9tkMjmOez+gmNh/VwePeeYrmehmj57Z5NhuYJrsrK+OWaSKzNFHO9nHY6WJjwyRw4HBe0nwFNfoza/8ARyP/AGhH+xfTcRtsvAZgKsbj8bGSDWj+trHH/wCSavXxj3qf5Jv2J3zD6kkZDG573BjGguc5x4AA95JW46c4p8813YJmdguMZDTBBB+bt5cHkH3F7nE/9kMX5iOnlixNHY2C5HaDCHNx1RhbW7geQXl3tScfAey372nwVOVOy1ExE4zLnZVlMXIzKNwiIqnNEREBERAVU/Jfxek4bothanTzMXc9qbJrRq38gwsmkcbMhlBBjjPiQvA9geAPf7zayrr5P+W+m+leKufUH7Mu+WwPqz6HofNuJ3ju7PSi49Tj1P0Bz38+feQsVQzbLW7Q7/pMWAp0p9TlfbGxT2CBNC0RA1vSHcCeZOQeA7x9ymarff8AFUrnVrplcn3V+At1Jcga+vtl7RnO6uA5pb3Du9Ie37ncc/D3oLIREQEREBERAREQEREBERAREQEREBERAREQEREBERAREQEREBERAREQEREBERAREQEREBERAREQEREBERBztnP+nvrP/uFZ/wB8C6JXO2c/6e+s/wDuFZ/3wLolAREQEREBERAREQEREBQvo/i92w2gY+p1DzFLPbYx8xtX8ewMhkaZXmIACOMeIywH2B5B9/vM0VU/Jfxek4bothanTzMXc9qbJrRq38gwsmkcbMhlBBjjPiQvA9geAPf7yFrKruo2U1an1k6U1Mxr1rJ7DblyQwuUh59PGubXBmMntD9NnDRyHeR8FaKhm2Wt2h3/AEmLAU6U+pyvtjYp7BAmhaIga3pDuBPMnIPAd4+5BM0REBERAREQEREBERAREQEREBERAREQEREBERAREQEREBERAREQEREBERAREQEREBERAREQEREBERAREQcs9Zt2xPSD5XOsbvt0lrE6l9UZ8Ycx8xnmrttOtdzYnOjY7tcW+fPHhdGaluuA33Dx5XW8zRzmOk91mhYbMzn7iWk8H7wfIW0t1IL9aWtZhjsV5WlkkMrA5j2n3gg+CP1KgNt+RXp9jMSbBoGRyfSnaD5F7WJjFXkP3SVuexzP+q3sB+KDoVFzB9ofX7of7G6anW6t63F787qLRDkmN/lSVDwHu/VGAB8XKy+lPynenHWR4q6/sUMeZBLZMLkQat6Nw97fSfwXEfEs7gPvQWoiIgIiICIiAiIgKuvk/wCW+m+leKufUH7Mu+WwPqz6HofNuJ3ju7PSi49Tj1P0Bz38+febFUL6P4vdsNoGPqdQ8xSz22MfMbV/HsDIZGmV5iAAjjHiMsB9geQff7yE0VZbpRx+a619PmDdZMPl8RDev/VqF5Ay0EkYi7pAHAERu9och3nnjjglWaqw6TSfX3IZHe83oR1HZo57WDrWLgPzyxjo5+Y3PBaCwOcCe3yPHIcQ4ILPREQEREBERAREQEREBERAREQEREBERAREQEREBERAREQEREBERAREQEREBERAREQEREBERAREQEREBERAREQFWnVb5OPTvrQwv2nWqtnIAD08rWBguxkfokTM4cePgHEj9SstEHMH2T9c+iPt9PN3i6ka7F+jrW7H/wDFsZ/JiuN45PHgB/a0fcVtda+WprdXLQ4Dqbg8r0m2N/siHPxE0pj8TFaaOxzf+s4NH610UoTu2a0DO0bGD2ebC5es/wATY641lkc/9aPh3B/q5WdNFVc4URimImdz76hdXdZ6a9Msnv2TvfOtcowNn9fHAWDP3OayNsfaeHF73NaCSGgu5JABIzunPUTBdVtMxm063cF3E5CP1In+5zD7nMeP8lzSCCPgQv5n/LJ6LYfFZbE4bo5h9jnwVnvyOUoV5ZZcUyxyWQmJjySJQ0zd3PgNkYG+9wGH8jre+qHycN09C9qmcuaRk5AMlQZWc4xO9wsRD4Pb45A/SaOD5DS23V73JPSWWZVwf1hRRL7V9T/GI/ypP3U+1fU/xiP8qT91NXvck9JMyrglqKJfavqf4xH+VJ+6n2r6n+MR/lSfupq97knpJmVcEtVU/Jfxek4bothanTzMXc9qbJrRq38gwsmkcbMhlBBjjPiQvA9geAPf7zKftX1P8Yj/ACpP3VA+h/V7WLHTbGyTa3V6ayGSfnWq8XsVv4Z/tDsiY32/4zw0fp/H3pq97knpJmVcEm6x+vsuGGk4Lea+k7hmWetQsDh9p0UMjHTGFnc0k9vgnzwCTweDxP4IzDDHGZHSlrQ0yP47ncD3ngAcn9QVHaL1I07dNxye6ZzXfq5sWLls4HG37pe+a1jxIHtmawN4ja93PAI7hwfPB82R9q+p/jEf5Un7qave5J6SZlXBLUUS+1fU/wAYj/Kk/dT7V9T/ABiP8qT91NXvck9JMyrglqKJfavqf4xH+VJ+6n2r6n+MR/lSfupq97knpJmVcEtRRWLqnqcrg052pDz8Z3GIf2uACk1exFbgZNBKyaF45bJG4Oa4feCPeq67ddH66Zj2sZiY3vRERVoEREBERAREQEREBERAREQEREBERAREQEREBERAREQEREBERAREQEREBERAREQEREBERAREQERRTqhdfV021FG8xyXZYaIc33hssrY3Ef0Nc4/1LOinPriniypjOmIhGcxnZd6LvTmfDrp/iWwSFjrw/nHuaQfTP+SwH2h5dyCGj5q1IKMIirwx14h7mRMDWj+oL0jjbExrGNDGNADWtHAA+4Kv9861YnQ8xNi3YrM527Vo/SV2PDVWzfM6vJAllLntHktfw1vc49pPHCxruTXsjZHDz2vSUUUWKcIWCiqpvyhsXYu4PH0Ncz+XymYwUOwV6ePhgkIrSHgdzzMGAjxzy7jyACSeFsc31roa3s8WKymu7FRoyXYse3PTUWjHmeQhsbe/v7uC5wb39nbyeOVSs0lPFYiKketPygW6zru80tZx+ayGYwmPl9bMY+iyanjbJiLoxK554JHLXODWvDQfa48qSXOs9bD/ADPHRYXO7VmI8fBcyEeCpsl+atkby0yFz2AF3DiGN5cQOe3jhEaSnHDFZSLV6ts2O3PXcfnMRYFrG34WzwS9paS0/eD5BHuIPkEELQb31Tx2i5LGYr6OymfzmRa+SvisLA2awYmcd8ru5zWsYC5o5c4ck8DkozmqIjFM0VF9PuvzvqMzJZ6DKZbM5PYcljsXh6lJgvSMink7Y/T9hrfTjb7Tnkcce07n3yj/AMoLWWaxLlZa2Vgux5IYd2BfU5yJukdza4iBILi09wId29vnu4RhFymYxxWYigEPWKqyDBuyWuZ/Bz5fMNwkFXJVo2SNmdC+UPd2yOaY+2Nw7ml3nxx4PHjtPXXAajb2SC7UycjsDYx1a0a1dsne644Ni9Nod3O4J9occ/yQ5E59OGOKxUVQz/KRoVbWZpzaXt8eQw0LbeQp/MYS+vWcCWzlwmLHNIa7hrXF/su9nwVvcx1rxFOTGQ4jGZjbbWQoR5RlbBVmyPjqSfxc0he9jWh3B7Rz3HtPAPBRGkp4rBRVe/5QmAsfV5mJxebz1rO1bNmnVx9RvqA15GxzRSCR7BG9rnEHvIbyxw7uS0O0O4/KIlg1/Rsxq+ByOUgzee+irlZ0EbbNcx+oJa5a+VgbOXxkDklvDH8kctJE3KY7V3EBwII5B94Kwqlexrlh13BFlaYnulpOJbWsgf5Lmjwxx+EjRyDxyHAFphL+tNM7dFrcGt7DdybIak2QFWrG9mL+cc+m2w4Se/wSTH3gBpPPA5Vhqyi5VbnYTFF2Jidqe4DOVtjxUN6r3NY/lro5AA+J7SQ5jgCQHNcCDwSPHgkeVsVXnT+yae25rHg8Q2a8N5rOOAJOXRyH+sNi/s/WrDV1ymKZ2bp29Xm7tGjrmkREVSoREQEREBERAREQEREBERAREQEREBERAREQEREBERAREQEREBERAREQEREBERAREQEREBRTqfSfa021LGwySUpYbwa33lsUrZHAfrLWuH9alaLO3VmVxVwZUzmzEwrCORssbXscHscA5rmnkEfAgrmfr5HlcR1Tt2sJNLUhyuutqZf5lksbHPNCJZQ1wjtysMZa1zwJW94PeRwCzz0tmMDLoznelC+bXR/EugjL3UR/Nua0E+mPg4D2R4dwAHHQZfTtS3t1e7lMHhdiMPiGxbqQ2uz/ALLnA8f1LGu3NG2NtPHz2vRRVTlFGNMqy6P4aHK7hrW367jp6ujyaNVxlB9pzBKxzLBc2Jze4u5DOCXeWn4EqA730c3DN3djll0pmxZ87CzKUdlsZSEcUI7LJY6tdj3d0TgxvplpDGE9zi88rquKJkETI42NjjYA1rGDgNA9wA+AX0qWU2omnCXNu2aPv+F1zq1qWH1JuxY/bZr1+hlYslBB6L7UQD4ZY5HB3LXA9pbyCC0Et+HlmuitzFb9ks9c6Z4vqVTzWPos9OzNWZPjLMEAhc3mbwYnhrTywkgg+yV0uiYk2aZ7UCx+96F06xlLX72a1fTbNWBhfgzkq8IqFw7y0NJb45cSD2jnnn4qI5qxkLnVDG9R9CrUOoWJlxMmAvQYvKV2vgImEzZGSOd6bvJIc3uB/RPlXNJUglcXPhje4/FzASvuOJkLe2NjWN9/DRwEZzTM7MXNmt9NN41tmt7Z9W2Wc3ic/nLc+vtvwh8tW/I4h0UxPp97eIyA4t5BcD2nwv2XpRvM2ek6ljD1m7P9ZG5ZmrPus4NRtL5n6ZmHMYnLfb55LQfHK6URMWGhp3Y+eKmd2qbrveF1rNfUx+Nyeu7JBlI8LLk675rddsMkb+HtPpMf/DOIaX8Hs/SHPAiGd6db1t2S3rKT6x9GOy+X1y1TquvwSP8AQqTNMxeQ/tDmtBJHJ59zS5dKoiZtRVvnzuVRc0fNy9Q+puTbS5o5nXadCjL6rP4adgtB7OO7lvHqs8uAHte/weKtqdCMrgJ9Uy2V6c47qCx2p4zD3sVbnq+vjbdaLtLo3Sn03Md3Fru13PLQRyF1SiYk2qat6n8B05uYrqH0/wApQ1alrWGx+DyENyjj5YjDTszyV3iNob2l/JZIe5rePHnjkKLT9L9vx+px2KuFbcyeM6h29mhxhtxMdbqPnn7ex5d2NcWTBwDyPdweCuiUTFOijz59Sgeoet7jtm0YbMYLRrGt7M35p/8A5LHmoGthhEodPXtQsd/DsDe4AAPHLuQW/G/kJDQSTwB5JKwqlixsdh1PBdlmUHtluuBdWrA/5TnDw9w+EbTyTxyWAlwsooqubic21E1VS3HT6sbm25rIAcw1q8NFr+eQZOXSSD+oOi/t/UrDWvwODra7i4aNXuMcfLnSSEF8j3Elz3EAAuc4kngAefAA8LYK65VFU7N0bOjzl2vSVzUIiKpUIiICIiAiIgIiICIiAiIgIiICIiAiIgIiICIiAiIgIiICIiAiIgIiICIiAiIgIiICIiAiIgKO5Hp5rWWsus2cHSfZceXTtiDJHH7y5vBKkSLOmuqicaJwTEzG5Evso1T8Hj/Nk/eT7KNU/B4/zZP3lLUVusXueesss+riiX2Uap+Dx/myfvJ9lGqfg8f5sn7ylqJrF7nnrJn1cUS+yjVPweP82T95Pso1T8Hj/Nk/eUtRNYvc89ZM+riiX2Uap+Dx/myfvJ9lGqfg8f5sn7ylqJrF7nnrJn1cUS+yjVPweP8ANk/eT7KNU/B4/wA2T95S1E1i9zz1kz6uKJfZRqn4PH+bJ+8n2Uap+Dx/myfvKWomsXueesmfVxRL7KNU/B4/zZP3k+yjVPweP82T95S1E1i9zz1kz6uKKxdLdTieHHBVJuPhO0yj+xxIUmr14qkLIYImQwsHDY42hrWj7gB7l6Iq67ldf66pn2sZmZ3yIiKtD//Z",
      "text/plain": [
       "<IPython.core.display.Image object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from IPython.display import Image, display\n",
    "\n",
    "g = app.get_graph()\n",
    "png = g.draw_mermaid_png()\n",
    "display(Image(png))\n",
    "# display(Image(app.get_graph().draw_mermaid_png()))\n",
    "\n",
    "# try:\n",
    "#     display(Image(app.get_graph().draw_mermaid_png()))\n",
    "\n",
    "# except Exception as e:\n",
    "#     print(str(e))\n",
    "#     pass\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "---ROUTE QUESTION---\n",
      "What is the AlphaCodium paper about?\n",
      "{'datasource': 'vectorstore'}\n",
      "vectorstore\n",
      "---ROUTE QUESTION TO RAG---\n",
      "---RETRIEVE---\n",
      "\"Node 'retrieve':\"\n",
      "'\\n---\\n'\n",
      "---CHECK DOCUMENT RELEVANCE TO QUESTION---\n",
      "---GRADE: DOCUMENT NOT RELEVANT---\n",
      "---GRADE: DOCUMENT NOT RELEVANT---\n",
      "---GRADE: DOCUMENT NOT RELEVANT---\n",
      "---GRADE: DOCUMENT NOT RELEVANT---\n",
      "---ASSESS GRADED DOCUMENTS---\n",
      "---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, TRANSFORM QUERY---\n",
      "\"Node 'grade_documents':\"\n",
      "'\\n---\\n'\n",
      "---TRANSFORM QUERY---\n",
      "\"Node 'transform_query':\"\n",
      "'\\n---\\n'\n",
      "---RETRIEVE---\n",
      "\"Node 'retrieve':\"\n",
      "'\\n---\\n'\n",
      "---CHECK DOCUMENT RELEVANCE TO QUESTION---\n",
      "---GRADE: DOCUMENT NOT RELEVANT---\n",
      "---GRADE: DOCUMENT NOT RELEVANT---\n",
      "---GRADE: DOCUMENT RELEVANT---\n",
      "---GRADE: DOCUMENT RELEVANT---\n",
      "---ASSESS GRADED DOCUMENTS---\n",
      "---DECISION: GENERATE---\n",
      "\"Node 'grade_documents':\"\n",
      "'\\n---\\n'\n",
      "---GENERATE---\n",
      "---CHECK HALLUCINATIONS---\n",
      "---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\n",
      "---GRADE GENERATION vs QUESTION---\n",
      "---DECISION: GENERATION DOES NOT ADDRESS QUESTION---\n",
      "\"Node 'generate':\"\n",
      "'\\n---\\n'\n",
      "---TRANSFORM QUERY---\n",
      "\"Node 'transform_query':\"\n",
      "'\\n---\\n'\n",
      "---RETRIEVE---\n",
      "\"Node 'retrieve':\"\n",
      "'\\n---\\n'\n",
      "---CHECK DOCUMENT RELEVANCE TO QUESTION---\n",
      "---GRADE: DOCUMENT NOT RELEVANT---\n",
      "---GRADE: DOCUMENT RELEVANT---\n",
      "---GRADE: DOCUMENT NOT RELEVANT---\n",
      "---GRADE: DOCUMENT NOT RELEVANT---\n",
      "---ASSESS GRADED DOCUMENTS---\n",
      "---DECISION: GENERATE---\n",
      "\"Node 'grade_documents':\"\n",
      "'\\n---\\n'\n",
      "---GENERATE---\n",
      "---CHECK HALLUCINATIONS---\n",
      "---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, RE-TRY---\n",
      "\"Node 'generate':\"\n",
      "'\\n---\\n'\n",
      "---GENERATE---\n",
      "---CHECK HALLUCINATIONS---\n",
      "---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, RE-TRY---\n",
      "\"Node 'generate':\"\n",
      "'\\n---\\n'\n",
      "---GENERATE---\n",
      "---CHECK HALLUCINATIONS---\n",
      "---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, RE-TRY---\n",
      "\"Node 'generate':\"\n",
      "'\\n---\\n'\n",
      "---GENERATE---\n",
      "---CHECK HALLUCINATIONS---\n",
      "---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, RE-TRY---\n",
      "\"Node 'generate':\"\n",
      "'\\n---\\n'\n",
      "---GENERATE---\n",
      "---CHECK HALLUCINATIONS---\n",
      "---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, RE-TRY---\n",
      "\"Node 'generate':\"\n",
      "'\\n---\\n'\n",
      "---GENERATE---\n",
      "---CHECK HALLUCINATIONS---\n",
      "---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, RE-TRY---\n",
      "\"Node 'generate':\"\n",
      "'\\n---\\n'\n",
      "---GENERATE---\n",
      "---CHECK HALLUCINATIONS---\n",
      "---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, RE-TRY---\n",
      "\"Node 'generate':\"\n",
      "'\\n---\\n'\n",
      "---GENERATE---\n",
      "---CHECK HALLUCINATIONS---\n",
      "---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, RE-TRY---\n",
      "\"Node 'generate':\"\n",
      "'\\n---\\n'\n",
      "---GENERATE---\n",
      "---CHECK HALLUCINATIONS---\n",
      "---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, RE-TRY---\n",
      "\"Node 'generate':\"\n",
      "'\\n---\\n'\n",
      "---GENERATE---\n",
      "---CHECK HALLUCINATIONS---\n",
      "---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, RE-TRY---\n",
      "\"Node 'generate':\"\n",
      "'\\n---\\n'\n",
      "---GENERATE---\n",
      "---CHECK HALLUCINATIONS---\n",
      "---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, RE-TRY---\n",
      "\"Node 'generate':\"\n",
      "'\\n---\\n'\n",
      "---GENERATE---\n",
      "---CHECK HALLUCINATIONS---\n",
      "---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, RE-TRY---\n",
      "\"Node 'generate':\"\n",
      "'\\n---\\n'\n",
      "---GENERATE---\n",
      "---CHECK HALLUCINATIONS---\n",
      "---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, RE-TRY---\n",
      "\"Node 'generate':\"\n",
      "'\\n---\\n'\n",
      "---GENERATE---\n",
      "---CHECK HALLUCINATIONS---\n",
      "---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, RE-TRY---\n",
      "\"Node 'generate':\"\n",
      "'\\n---\\n'\n",
      "---GENERATE---\n",
      "---CHECK HALLUCINATIONS---\n",
      "---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, RE-TRY---\n",
      "\"Node 'generate':\"\n",
      "'\\n---\\n'\n",
      "---GENERATE---\n",
      "---CHECK HALLUCINATIONS---\n",
      "---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, RE-TRY---\n",
      "\"Node 'generate':\"\n",
      "'\\n---\\n'\n",
      "---GENERATE---\n",
      "---CHECK HALLUCINATIONS---\n",
      "---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, RE-TRY---\n",
      "\"Node 'generate':\"\n",
      "'\\n---\\n'\n",
      "---GENERATE---\n",
      "---CHECK HALLUCINATIONS---\n",
      "---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, RE-TRY---\n",
      "\"Node 'generate':\"\n",
      "'\\n---\\n'\n",
      "---GENERATE---\n",
      "---CHECK HALLUCINATIONS---\n",
      "---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, RE-TRY---\n",
      "\"Node 'generate':\"\n",
      "'\\n---\\n'\n",
      "---GENERATE---\n",
      "---CHECK HALLUCINATIONS---\n",
      "---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, RE-TRY---\n",
      "\"Node 'generate':\"\n",
      "'\\n---\\n'\n",
      "---GENERATE---\n",
      "---CHECK HALLUCINATIONS---\n",
      "---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, RE-TRY---\n",
      "\"Node 'generate':\"\n",
      "'\\n---\\n'\n"
     ]
    },
    {
     "ename": "GraphRecursionError",
     "evalue": "Recursion limit of 30 reachedwithout hitting a stop condition. You can increase the limit by setting the `recursion_limit` config key.",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mGraphRecursionError\u001b[0m                       Traceback (most recent call last)",
      "Cell \u001b[1;32mIn[16], line 5\u001b[0m\n\u001b[0;32m      3\u001b[0m \u001b[38;5;66;03m# Run\u001b[39;00m\n\u001b[0;32m      4\u001b[0m inputs \u001b[38;5;241m=\u001b[39m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquestion\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mWhat is the AlphaCodium paper about?\u001b[39m\u001b[38;5;124m\"\u001b[39m}\n\u001b[1;32m----> 5\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m output \u001b[38;5;129;01min\u001b[39;00m app\u001b[38;5;241m.\u001b[39mstream(inputs,{\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrecursion_limit\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;241m30\u001b[39m}):\n\u001b[0;32m      6\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m key, value \u001b[38;5;129;01min\u001b[39;00m output\u001b[38;5;241m.\u001b[39mitems():\n\u001b[0;32m      7\u001b[0m         \u001b[38;5;66;03m# Node\u001b[39;00m\n\u001b[0;32m      8\u001b[0m         pprint(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNode \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mkey\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m:\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
      "File \u001b[1;32mc:\\Users\\zhaoy\\anaconda3\\Lib\\site-packages\\langgraph\\pregel\\__init__.py:1014\u001b[0m, in \u001b[0;36mPregel.stream\u001b[1;34m(self, input, config, stream_mode, output_keys, input_keys, interrupt_before, interrupt_after, debug)\u001b[0m\n\u001b[0;32m   1012\u001b[0m         \u001b[38;5;28;01mbreak\u001b[39;00m\n\u001b[0;32m   1013\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m-> 1014\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m GraphRecursionError(\n\u001b[0;32m   1015\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRecursion limit of \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconfig[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mrecursion_limit\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m reached\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m   1016\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mwithout hitting a stop condition. You can increase the \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m   1017\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlimit by setting the `recursion_limit` config key.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m   1018\u001b[0m     )\n\u001b[0;32m   1020\u001b[0m \u001b[38;5;66;03m# set final channel values as run output\u001b[39;00m\n\u001b[0;32m   1021\u001b[0m run_manager\u001b[38;5;241m.\u001b[39mon_chain_end(read_channels(channels, output_keys))\n",
      "\u001b[1;31mGraphRecursionError\u001b[0m: Recursion limit of 30 reachedwithout hitting a stop condition. You can increase the limit by setting the `recursion_limit` config key."
     ]
    }
   ],
   "source": [
    "from pprint import pprint\n",
    "\n",
    "# Run\n",
    "inputs = {\"question\": \"What is the AlphaCodium paper about?\"}\n",
    "for output in app.stream(inputs,{\"recursion_limit\": 30}):\n",
    "    for key, value in output.items():\n",
    "        # Node\n",
    "        pprint(f\"Node '{key}':\")\n",
    "        # Optional: print full state at each node\n",
    "        # pprint.pprint(value[\"keys\"], indent=2, width=80, depth=None)\n",
    "    pprint(\"\\n---\\n\")\n",
    "\n",
    "# Final generation\n",
    "pprint(value[\"generation\"])"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
